Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -1064,6 +1064,8 @@ to trap and to be properly aligned. This is not a valid attribute for return values. +.. _attr_align: + ``align `` This indicates that the pointer value may be assumed by the optimizer to have the specified alignment. @@ -10339,9 +10341,9 @@ :: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* , i8* , - i32 , i32 , i1 ) + i32 , i1 ) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* , i8* , - i64 , i32 , i1 ) + i64 , i1 ) Overview: """"""""" @@ -10350,7 +10352,7 @@ source location to the destination location. Note that, unlike the standard libc function, the ``llvm.memcpy.*`` -intrinsics do not return a value, takes extra alignment/isvolatile +intrinsics do not return a value, takes extra isvolatile arguments and the pointers can be in specified address spaces. Arguments: @@ -10358,13 +10360,11 @@ The first argument is a pointer to the destination, the second is a pointer to the source. The third argument is an integer argument -specifying the number of bytes to copy, the fourth argument is the -alignment of the source and destination locations, and the fifth is a +specifying the number of bytes to copy, and the fourth is a boolean indicating a volatile access. -If the call to this intrinsic has an alignment value that is not 0 or 1, -then the caller guarantees that both the source and destination pointers -are aligned to that boundary. +The :ref:`align <_attr_align>` parameter attribute can be provided +for the first and second arguments. If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy`` call is a :ref:`volatile operation `. The detailed access behavior is not @@ -10379,6 +10379,25 @@ to be aligned to some boundary, this can be specified as the fourth argument, otherwise it should be set to 0 or 1 (both meaning no alignment). +History +""""""" + +In LLVM 6.0 and earlier, the prototype for ``llvm.memcpy`` included an +argument for the alignment of the pointer arguments, rather than using +the align parameter attribute: + +:: + + declare void @llvm.memcpy.p0i8.p0i8.i32(i8* , i8* , + i32 , i32 , i1 ) + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* , i8* , + i64 , i32 , i1 ) + +This fourth argument is the alignment of the source and destination locations. +If the call to this intrinsic has an alignment value that is not 0 or 1, +then the caller guarantees that both the source and destination pointers +are aligned to that boundary. + .. _int_memmove: '``llvm.memmove``' Intrinsic @@ -10394,9 +10413,9 @@ :: declare void @llvm.memmove.p0i8.p0i8.i32(i8* , i8* , - i32 , i32 , i1 ) + i32 , i1 ) declare void @llvm.memmove.p0i8.p0i8.i64(i8* , i8* , - i64 , i32 , i1 ) + i64 , i1 ) Overview: """"""""" @@ -10407,21 +10426,19 @@ overlap. Note that, unlike the standard libc function, the ``llvm.memmove.*`` -intrinsics do not return a value, takes extra alignment/isvolatile -arguments and the pointers can be in specified address spaces. +intrinsics do not return a value, takes an extra isvolatile +argument and the pointers can be in specified address spaces. Arguments: """""""""" The first argument is a pointer to the destination, the second is a pointer to the source. The third argument is an integer argument -specifying the number of bytes to copy, the fourth argument is the -alignment of the source and destination locations, and the fifth is a +specifying the number of bytes to copy, and the fourth is a boolean indicating a volatile access. -If the call to this intrinsic has an alignment value that is not 0 or 1, -then the caller guarantees that the source and destination pointers are -aligned to that boundary. +The :ref:`align <_attr_align>` parameter attribute can be provided +for the first and second arguments. If the ``isvolatile`` parameter is ``true``, the ``llvm.memmove`` call is a :ref:`volatile operation `. The detailed access behavior is @@ -10436,6 +10453,25 @@ aligned to some boundary, this can be specified as the fourth argument, otherwise it should be set to 0 or 1 (both meaning no alignment). +History +""""""" + +In LLVM 6.0 and earlier, the prototype for ``llvm.memmove`` included an +argument for the alignment of the pointer arguments, rather than using +the align parameter attribute: + +:: + + declare void @llvm.memmove.p0i8.p0i8.i32(i8* , i8* , + i32 , i32 , i1 ) + declare void @llvm.memmove.p0i8.p0i8.i64(i8* , i8* , + i64 , i32 , i1 ) + +This fourth argument is the alignment of the source and destination locations. +If the call to this intrinsic has an alignment value that is not 0 or 1, +then the caller guarantees that both the source and destination pointers +are aligned to that boundary. + .. _int_memset: '``llvm.memset.*``' Intrinsics @@ -10451,9 +10487,9 @@ :: declare void @llvm.memset.p0i8.i32(i8* , i8 , - i32 , i32 , i1 ) + i32 , i1 ) declare void @llvm.memset.p0i8.i64(i8* , i8 , - i64 , i32 , i1 ) + i64 , i1 ) Overview: """"""""" @@ -10462,8 +10498,8 @@ particular byte value. Note that, unlike the standard libc function, the ``llvm.memset`` -intrinsic does not return a value and takes extra alignment/volatile -arguments. Also, the destination can be in an arbitrary address space. +intrinsic does not return a value and takes an extra volatile +argument. Also, the destination can be in an arbitrary address space. Arguments: """""""""" @@ -10471,11 +10507,10 @@ The first argument is a pointer to the destination to fill, the second is the byte value with which to fill it, the third argument is an integer argument specifying the number of bytes to fill, and the fourth -argument is the known alignment of the destination location. +is a boolean indicating a volatile access. -If the call to this intrinsic has an alignment value that is not 0 or 1, -then the caller guarantees that the destination pointer is aligned to -that boundary. +The :ref:`align <_attr_align>` parameter attribute can be provided +for the first arguments. If the ``isvolatile`` parameter is ``true``, the ``llvm.memset`` call is a :ref:`volatile operation `. The detailed access behavior is not @@ -10485,9 +10520,26 @@ """""""""" The '``llvm.memset.*``' intrinsics fill "len" bytes of memory starting -at the destination location. If the argument is known to be aligned to -some boundary, this can be specified as the fourth argument, otherwise -it should be set to 0 or 1 (both meaning no alignment). +at the destination location. + +History +""""""" + +In LLVM 6.0 and earlier, the prototype for ``llvm.memmove`` included an +argument for the alignment of the pointer arguments, rather than using +the align parameter attribute: + +:: + + declare void @llvm.memset.p0i8.i32(i8* , i8 , + i32 , i32 , i1 ) + declare void @llvm.memset.p0i8.i64(i8* , i8 , + i64 , i32 , i1 ) + +This fourth argument is the alignment of the source and destination locations. +If the call to this intrinsic has an alignment value that is not 0 or 1, +then the caller guarantees that the destination pointer is aligned to +that boundary. '``llvm.sqrt.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: include/llvm/IR/IRBuilder.h =================================================================== --- include/llvm/IR/IRBuilder.h +++ include/llvm/IR/IRBuilder.h @@ -402,15 +402,15 @@ /// If the pointer isn't an i8*, it will be converted. If a TBAA tag is /// specified, it will be added to the instruction. Likewise with alias.scope /// and noalias tags. - CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, + CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned DstAlign, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { - return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, + return CreateMemSet(Ptr, Val, getInt64(Size), DstAlign, isVolatile, TBAATag, ScopeTag, NoAliasTag); } - CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, + CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned DstAlign, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); @@ -420,16 +420,19 @@ /// If the pointers aren't i8*, they will be converted. If a TBAA tag is /// specified, it will be added to the instruction. Likewise with alias.scope /// and noalias tags. - CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align, + CallInst *CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, + unsigned SrcAlign, uint64_t Size, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { - return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag, - TBAAStructTag, ScopeTag, NoAliasTag); + return CreateMemCpy(Dst, DstAlign, Src, SrcAlign, getInt64(Size), + isVolatile, TBAATag, TBAAStructTag, ScopeTag, + NoAliasTag); } - CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, + CallInst *CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, + unsigned SrcAlign, Value *Size, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr, @@ -465,15 +468,17 @@ /// If the pointers aren't i8*, they will be converted. If a TBAA tag is /// specified, it will be added to the instruction. Likewise with alias.scope /// and noalias tags. - CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align, + CallInst *CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, + uint64_t Size, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr) { - return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, + return CreateMemMove(Dst, DstAlign, Src, SrcAlign, getInt64(Size), isVolatile, TBAATag, ScopeTag, NoAliasTag); } - CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, + CallInst *CreateMemMove(Value *Dst, unsigned DstAlign, Value *Src, unsigned SrcAlign, + Value *Size, bool isVolatile = false, MDNode *TBAATag = nullptr, MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); Index: include/llvm/IR/IntrinsicInst.h =================================================================== --- include/llvm/IR/IntrinsicInst.h +++ include/llvm/IR/IntrinsicInst.h @@ -243,6 +243,10 @@ return cast(getRawDest()->getType())->getAddressSpace(); } + unsigned getDestAlignment() const { + return getParamAlignment(ARG_DEST); + } + /// Set the specified arguments of the instruction. void setDest(Value *Ptr) { assert(getRawDest()->getType() == Ptr->getType() && @@ -250,6 +254,13 @@ setArgOperand(ARG_DEST, Ptr); } + void setDestAlignment(unsigned Align) { + removeParamAttr(ARG_DEST, Attribute::Alignment); + if (Align > 0) + addParamAttr(ARG_DEST, + Attribute::getWithAlignment(getContext(), Align)); + } + void setLength(Value *L) { assert(getLength()->getType() == L->getType() && "setLength called with value of wrong type!"); @@ -347,12 +358,22 @@ return cast(getRawSource()->getType())->getAddressSpace(); } + unsigned getSourceAlignment() const { + return getParamAlignment(ARG_SOURCE); + } + void setSource(Value *Ptr) { assert(getRawSource()->getType() == Ptr->getType() && "setSource called with pointer of wrong type!"); setArgOperand(ARG_SOURCE, Ptr); } + void setSourceAlignment(unsigned Align) { + removeParamAttr(ARG_SOURCE, Attribute::Alignment); + if (Align > 0) + addParamAttr(ARG_SOURCE, Attribute::getWithAlignment(getContext(), Align)); + } + static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::memcpy_element_unordered_atomic: @@ -394,17 +415,9 @@ /// This is the common base class for memset/memcpy/memmove. class MemIntrinsic : public MemIntrinsicBase { private: - enum { ARG_ALIGN = 3, ARG_VOLATILE = 4 }; + enum { ARG_VOLATILE = 3 }; public: - ConstantInt *getAlignmentCst() const { - return cast(const_cast(getArgOperand(ARG_ALIGN))); - } - - unsigned getAlignment() const { - return getAlignmentCst()->getZExtValue(); - } - ConstantInt *getVolatileCst() const { return cast( const_cast(getArgOperand(ARG_VOLATILE))); @@ -414,14 +427,8 @@ return !getVolatileCst()->isZero(); } - void setAlignment(Constant *A) { setArgOperand(ARG_ALIGN, A); } - void setVolatile(Constant *V) { setArgOperand(ARG_VOLATILE, V); } - Type *getAlignmentType() const { - return getArgOperand(ARG_ALIGN)->getType(); - } - // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { @@ -462,11 +469,13 @@ /// This class wraps the llvm.memcpy/memmove intrinsics. class MemTransferInst : public MemIntrinsic { + private: + enum { ARG_SOURCE = 1 }; public: /// Return the arguments to the instruction. - Value *getRawSource() const { return const_cast(getArgOperand(1)); } - const Use &getRawSourceUse() const { return getArgOperandUse(1); } - Use &getRawSourceUse() { return getArgOperandUse(1); } + Value *getRawSource() const { return const_cast(getArgOperand(ARG_SOURCE)); } + const Use &getRawSourceUse() const { return getArgOperandUse(ARG_SOURCE); } + Use &getRawSourceUse() { return getArgOperandUse(ARG_SOURCE); } /// This is just like getRawSource, but it strips off any cast /// instructions that feed it, giving the original input. The returned @@ -477,12 +486,22 @@ return cast(getRawSource()->getType())->getAddressSpace(); } + unsigned getSourceAlignment() const { + return getParamAlignment(ARG_SOURCE); + } + void setSource(Value *Ptr) { assert(getRawSource()->getType() == Ptr->getType() && "setSource called with pointer of wrong type!"); setArgOperand(1, Ptr); } + void setSourceAlignment(unsigned Align) { + removeParamAttr(ARG_SOURCE, Attribute::Alignment); + if (Align > 0) + addParamAttr(ARG_SOURCE, Attribute::getWithAlignment(getContext(), Align)); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { return I->getIntrinsicID() == Intrinsic::memcpy || @@ -606,12 +625,22 @@ return cast(getRawSource()->getType())->getAddressSpace(); } + unsigned getSourceAlignment() const { + return getParamAlignment(ARG_SOURCE); + } + void setSource(Value *Ptr) { assert(getRawSource()->getType() == Ptr->getType() && "setSource called with pointer of wrong type!"); setArgOperand(ARG_SOURCE, Ptr); } + void setSourceAlignment(unsigned Align) { + removeParamAttr(ARG_SOURCE, Attribute::Alignment); + if (Align > 0) + addParamAttr(ARG_SOURCE, Attribute::getWithAlignment(getContext(), Align)); + } + static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::memcpy: Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -390,17 +390,17 @@ def int_memcpy : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, - llvm_i32_ty, llvm_i1_ty], + llvm_i1_ty], [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>]>; def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, - llvm_i32_ty, llvm_i1_ty], + llvm_i1_ty], [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, ReadOnly<1>]>; def int_memset : Intrinsic<[], [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, - llvm_i32_ty, llvm_i1_ty], + llvm_i1_ty], [IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; // FIXME: Add version of these floating point intrinsics which allow non-default Index: lib/Analysis/Lint.cpp =================================================================== --- lib/Analysis/Lint.cpp +++ lib/Analysis/Lint.cpp @@ -323,9 +323,9 @@ MemCpyInst *MCI = cast(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MCI->getDest(), MemoryLocation::UnknownSize, - MCI->getAlignment(), nullptr, MemRef::Write); + MCI->getDestAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MCI->getSource(), MemoryLocation::UnknownSize, - MCI->getAlignment(), nullptr, MemRef::Read); + MCI->getSourceAlignment(), nullptr, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API // isn't expressive enough for what we really want to do. Known partial @@ -345,16 +345,16 @@ MemMoveInst *MMI = cast(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MMI->getDest(), MemoryLocation::UnknownSize, - MMI->getAlignment(), nullptr, MemRef::Write); + MMI->getDestAlignment(), nullptr, MemRef::Write); visitMemoryReference(I, MMI->getSource(), MemoryLocation::UnknownSize, - MMI->getAlignment(), nullptr, MemRef::Read); + MMI->getSourceAlignment(), nullptr, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast(&I); // TODO: If the size is known, use it. visitMemoryReference(I, MSI->getDest(), MemoryLocation::UnknownSize, - MSI->getAlignment(), nullptr, MemRef::Write); + MSI->getDestAlignment(), nullptr, MemRef::Write); break; } Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -1606,11 +1606,14 @@ // If this is a memcpy (or similar) then we may be able to improve the // alignment if (MemIntrinsic *MI = dyn_cast(CI)) { - unsigned Align = getKnownAlignment(MI->getDest(), *DL); - if (MemTransferInst *MTI = dyn_cast(MI)) - Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL)); - if (Align > MI->getAlignment()) - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL); + if (DestAlign > MI->getDestAlignment()) + MI->setDestAlignment(DestAlign); + if (MemTransferInst *MTI = dyn_cast(MI)) { + unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL); + if (SrcAlign > MTI->getSourceAlignment()) + MTI->setSourceAlignment(SrcAlign); + } } } Index: lib/CodeGen/SafeStack.cpp =================================================================== --- lib/CodeGen/SafeStack.cpp +++ lib/CodeGen/SafeStack.cpp @@ -545,6 +545,7 @@ for (Argument *Arg : ByValArguments) { unsigned Offset = SSL.getObjectOffset(Arg); + unsigned Align = SSL.getObjectAlignment(Arg); Type *Ty = Arg->getType()->getPointerElementType(); uint64_t Size = DL.getTypeStoreSize(Ty); @@ -561,7 +562,9 @@ DIExpression::NoDeref, -Offset, DIExpression::NoDeref); Arg->replaceAllUsesWith(NewArg); IRB.SetInsertPoint(cast(NewArg)->getNextNode()); - IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment()); + // FIXME: Can we provide a better alignment than the conservative value + // one for the destination of the memcpy? + IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size); } // Allocate space for every unsafe static AllocaInst on the unsafe stack. Index: lib/CodeGen/SafeStackLayout.h =================================================================== --- lib/CodeGen/SafeStackLayout.h +++ lib/CodeGen/SafeStackLayout.h @@ -47,6 +47,7 @@ SmallVector StackObjects; DenseMap ObjectOffsets; + DenseMap ObjectAlignments; void layoutObject(StackObject &Obj); @@ -64,6 +65,9 @@ /// Returns the offset to the object start in the stack frame. unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; } + /// Returns the alignment of the object start in the stack frame. + unsigned getObjectAlignment(const Value *V) { return ObjectAlignments[V]; } + /// Returns the size of the entire frame. unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; } Index: lib/CodeGen/SafeStackLayout.cpp =================================================================== --- lib/CodeGen/SafeStackLayout.cpp +++ lib/CodeGen/SafeStackLayout.cpp @@ -42,6 +42,7 @@ void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment, const StackColoring::LiveRange &Range) { StackObjects.push_back({V, Size, Alignment, Range}); + ObjectAlignments[V] = Alignment; MaxAlignment = std::max(MaxAlignment, Alignment); } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5000,14 +5000,19 @@ case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { + const auto &MCI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + unsigned DestAlign = MCI.getDestAlignment(); + unsigned SrcAlign = MCI.getSourceAlignment(); + unsigned Align = std::min(DestAlign, SrcAlign); if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. - bool isVol = cast(I.getArgOperand(4))->getZExtValue(); + bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, isTC, MachinePointerInfo(I.getArgOperand(0)), @@ -5016,28 +5021,34 @@ return nullptr; } case Intrinsic::memset: { + const auto &MSI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); - if (!Align) - Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. - bool isVol = cast(I.getArgOperand(4))->getZExtValue(); + unsigned DestAlign = MSI.getDestAlignment(); + if (!DestAlign) + DestAlign = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. + bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); - SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, DestAlign, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); updateDAGForMaybeTailCall(MS); return nullptr; } case Intrinsic::memmove: { + const auto &MMI = cast(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + unsigned DestAlign = MMI.getDestAlignment(); + unsigned SrcAlign = MMI.getSourceAlignment(); + unsigned Align = std::min(DestAlign, SrcAlign); if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. - bool isVol = cast(I.getArgOperand(4))->getZExtValue(); + bool isVol = MMI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memmove DAG + // node. SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); Index: lib/IR/Attributes.cpp =================================================================== --- lib/IR/Attributes.cpp +++ lib/IR/Attributes.cpp @@ -543,17 +543,17 @@ AttributeSet AttributeSet::removeAttribute(LLVMContext &C, Attribute::AttrKind Kind) const { if (!hasAttribute(Kind)) return *this; - AttrBuilder B; - B.addAttribute(Kind); - return removeAttributes(C, B); + AttrBuilder B(*this); + B.removeAttribute(Kind); + return get(C, B); } AttributeSet AttributeSet::removeAttribute(LLVMContext &C, StringRef Kind) const { if (!hasAttribute(Kind)) return *this; - AttrBuilder B; - B.addAttribute(Kind); - return removeAttributes(C, B); + AttrBuilder B(*this); + B.removeAttribute(Kind); + return get(C, B); } AttributeSet AttributeSet::removeAttributes(LLVMContext &C, @@ -1098,17 +1098,37 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const { if (!hasAttribute(Index, Kind)) return *this; - AttrBuilder B; - B.addAttribute(Kind); - return removeAttributes(C, Index, B); + if (!pImpl) + return AttributeList(); + + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + if (Index >= AttrSets.size()) + AttrSets.resize(Index + 1); + + AttrBuilder B(AttrSets[Index]); + B.removeAttribute(Kind); + AttrSets[Index] = AttributeSet::get(C, B); + + return getImpl(C, AttrSets); } AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, StringRef Kind) const { if (!hasAttribute(Index, Kind)) return *this; - AttrBuilder B; - B.addAttribute(Kind); - return removeAttributes(C, Index, B); + if (!pImpl) + return AttributeList(); + + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + if (Index >= AttrSets.size()) + AttrSets.resize(Index + 1); + + AttrBuilder B(AttrSets[Index]); + B.removeAttribute(Kind); + AttrSets[Index] = AttributeSet::get(C, B); + + return getImpl(C, AttrSets); } AttributeList @@ -1117,10 +1137,6 @@ if (!pImpl) return AttributeList(); - // FIXME it is not obvious how this should work for alignment. - // For now, say we can't pass in alignment, which no current use does. - assert(!AttrsToRemove.hasAlignmentAttr() && "Attempt to change alignment!"); - Index = attrIdxToArrayIdx(Index); SmallVector AttrSets(this->begin(), this->end()); if (Index >= AttrSets.size()) Index: lib/IR/AutoUpgrade.cpp =================================================================== --- lib/IR/AutoUpgrade.cpp +++ lib/IR/AutoUpgrade.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" @@ -518,6 +519,34 @@ return true; } } + // Updating the memory intrinsics (memcpy/memmove/memset) that have an + // alignment parameter to embedding the alignment as an attribute of + // the pointer args. + if (Name.startswith("memcpy.") && F->arg_size() == 5) { + F->setName(Name + ".old"); + // Get the types of dest, src, and len + ArrayRef ParamTypes = F->getFunctionType()->params().slice(0,3); + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy, ParamTypes); + return true; + } + if (Name.startswith("memmove.") && F->arg_size() == 5) { + F->setName(Name + ".old"); + // Get the types of dest, src, and len + ArrayRef ParamTypes = F->getFunctionType()->params().slice(0,3); + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove, ParamTypes); + return true; + } + if (Name.startswith("memset.") && F->arg_size() == 5) { + F->setName(Name + ".old"); + // Get the types of dest, and len + const auto *FT = F->getFunctionType(); + Type *ParamTypes[2] = { + FT->getParamType(0), // Dest + FT->getParamType(2) // len + }; + NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset, ParamTypes); + return true; + } break; } case 'n': { @@ -2171,14 +2200,17 @@ return; } - CallInst *NewCall = nullptr; - switch (NewFn->getIntrinsicID()) { - default: { + const auto &DefaultCase = [&NewFn,&CI]() -> void { // Handle generic mangling change, but nothing else assert( (CI->getCalledFunction()->getName() != NewFn->getName()) && "Unknown function for CallInst upgrade and isn't just a name change"); CI->setCalledFunction(NewFn); + }; + CallInst *NewCall = nullptr; + switch (NewFn->getIntrinsicID()) { + default: { + DefaultCase(); return; } @@ -2319,6 +2351,35 @@ NewCall = Builder.CreateCall(NewFn, Args); break; } + + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::memset: { + // We have to make sure that the call signature is what we're expecting. + // We only want to change the old signatures by removing the alignment arg: + // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) + // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) + // @llvm.memset...(i8*, i8, i[32|64], i32, i1) + // -> @llvm.memset...(i8*, i8, i[32|64], i1) + // Note: i8*'s in the above can be any pointer type + if (CI->getNumArgOperands() != 5) { + DefaultCase(); + return; + } + // Remove alignment argument (3), and add alignment attributes to the + // dest/src pointers. + Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(4)}; + NewCall = Builder.CreateCall(NewFn, Args); + auto *MemCI = cast(NewCall); + // All mem intrinsics support dest alignment. + const ConstantInt *Align = cast(CI->getArgOperand(3)); + MemCI->setDestAlignment(Align->getZExtValue()); + // Memcpy/Memmove also support source alignment. + if (auto *MTI = dyn_cast(MemCI)) + MTI->setSourceAlignment(Align->getZExtValue()); + break; + } } assert(NewCall && "Should have either set this variable or returned through " "the default case"); Index: lib/IR/IRBuilder.cpp =================================================================== --- lib/IR/IRBuilder.cpp +++ lib/IR/IRBuilder.cpp @@ -16,6 +16,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Statepoint.h" using namespace llvm; @@ -78,18 +79,21 @@ return II; } -CallInst *IRBuilderBase:: -CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align, - bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, - MDNode *NoAliasTag) { +CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size, + unsigned DstAlign, bool isVolatile, + MDNode *TBAATag, MDNode *ScopeTag, + MDNode *NoAliasTag) { Ptr = getCastedInt8PtrValue(Ptr); - Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) }; + Value *Ops[] = { Ptr, Val, Size, getInt1(isVolatile) }; Type *Tys[] = { Ptr->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); + if (DstAlign > 0) + cast(CI)->setDestAlignment(DstAlign); + // Set the TBAA info if present. if (TBAATag) CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); @@ -99,24 +103,33 @@ if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); - + return CI; } -CallInst *IRBuilderBase:: -CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align, - bool isVolatile, MDNode *TBAATag, MDNode *TBAAStructTag, - MDNode *ScopeTag, MDNode *NoAliasTag) { +CallInst *IRBuilderBase::CreateMemCpy(Value *Dst, unsigned DstAlign, Value *Src, + unsigned SrcAlign, Value *Size, + bool isVolatile, MDNode *TBAATag, + MDNode *TBAAStructTag, MDNode *ScopeTag, + MDNode *NoAliasTag) { + assert((DstAlign == 0 || isPowerOf2_32(DstAlign)) && "Must be 0 or a power of 2"); + assert((SrcAlign == 0 || isPowerOf2_32(SrcAlign)) && "Must be 0 or a power of 2"); Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); - Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) }; + Value *Ops[] = { Dst, Src, Size, getInt1(isVolatile) }; Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); + auto *MCI = cast(CI); + if (DstAlign > 0) + MCI->setDestAlignment(DstAlign); + if (SrcAlign > 0) + MCI->setSourceAlignment(SrcAlign); + // Set the TBAA info if present. if (TBAATag) CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); @@ -130,7 +143,7 @@ if (NoAliasTag) CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); - + return CI; } @@ -154,8 +167,9 @@ CallInst *CI = createCallHelper(TheFn, Ops, this); // Set the alignment of the pointer args. - CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), DstAlign)); - CI->addParamAttr(1, Attribute::getWithAlignment(CI->getContext(), SrcAlign)); + auto *AMCI = cast(CI); + AMCI->setDestAlignment(DstAlign); + AMCI->setSourceAlignment(SrcAlign); // Set the TBAA info if present. if (TBAATag) @@ -174,20 +188,29 @@ return CI; } -CallInst *IRBuilderBase:: -CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align, - bool isVolatile, MDNode *TBAATag, MDNode *ScopeTag, - MDNode *NoAliasTag) { +CallInst *IRBuilderBase::CreateMemMove(Value *Dst, unsigned DstAlign, + Value *Src, unsigned SrcAlign, + Value *Size, bool isVolatile, + MDNode *TBAATag, MDNode *ScopeTag, + MDNode *NoAliasTag) { + assert((DstAlign == 0 || isPowerOf2_32(DstAlign)) && "Must be 0 or a power of 2"); + assert((SrcAlign == 0 || isPowerOf2_32(SrcAlign)) && "Must be 0 or a power of 2"); Dst = getCastedInt8PtrValue(Dst); Src = getCastedInt8PtrValue(Src); - Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) }; + Value *Ops[] = { Dst, Src, Size, getInt1(isVolatile) }; Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() }; Module *M = BB->getParent()->getParent(); Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys); CallInst *CI = createCallHelper(TheFn, Ops, this); + auto *MMI = cast(CI); + if (DstAlign > 0) + MMI->setDestAlignment(DstAlign); + if (SrcAlign > 0) + MMI->setSourceAlignment(SrcAlign); + // Set the TBAA info if present. if (TBAATag) CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -4017,14 +4017,18 @@ case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: { - ConstantInt *AlignCI = dyn_cast(CS.getArgOperand(3)); - Assert(AlignCI, - "alignment argument of memory intrinsics must be a constant int", + const auto *MI = cast(CS.getInstruction()); + auto IsValidAlignment = [&](unsigned Alignment) -> bool { + return Alignment == 0 || isPowerOf2_32(Alignment); + }; + Assert(IsValidAlignment(MI->getDestAlignment()), + "alignment of arg 0 of memory intrinsic must be 0 or a power of 2", CS); - const APInt &AlignVal = AlignCI->getValue(); - Assert(AlignCI->isZero() || AlignVal.isPowerOf2(), - "alignment argument of memory intrinsics must be a power of 2", CS); - Assert(isa(CS.getArgOperand(4)), + if (const auto *MTI = dyn_cast(MI)) + Assert(IsValidAlignment(MTI->getSourceAlignment()), + "alignment of arg 1 of memory intrinsic must be 0 or a power of 2", + CS); + Assert(isa(CS.getArgOperand(3)), "isvolatile argument of memory intrinsics must be a constant int", CS); break; Index: lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- lib/Target/AArch64/AArch64FastISel.cpp +++ lib/Target/AArch64/AArch64FastISel.cpp @@ -3456,7 +3456,8 @@ // Small memcpy's are common enough that we want to do them without a call // if possible. uint64_t Len = cast(MTI->getLength())->getZExtValue(); - unsigned Alignment = MTI->getAlignment(); + unsigned Alignment = std::min(MTI->getDestAlignment(), + MTI->getSourceAlignment()); if (isMemCpySmall(Len, Alignment)) { Address Dest, Src; if (!computeAddress(MTI->getRawDest(), Dest) || @@ -3476,7 +3477,7 @@ return false; const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; - return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); + return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); @@ -3492,7 +3493,7 @@ // address spaces. return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } case Intrinsic::sin: case Intrinsic::cos: Index: lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -843,24 +843,24 @@ continue; case Intrinsic::memcpy: { MemCpyInst *MemCpy = cast(Intr); - Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), - MemCpy->getLength(), MemCpy->getAlignment(), - MemCpy->isVolatile()); + Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getDestAlignment(), + MemCpy->getRawSource(), MemCpy->getSourceAlignment(), + MemCpy->getLength(), MemCpy->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memmove: { MemMoveInst *MemMove = cast(Intr); - Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getRawSource(), - MemMove->getLength(), MemMove->getAlignment(), - MemMove->isVolatile()); + Builder.CreateMemMove(MemMove->getRawDest(), MemMove->getDestAlignment(), + MemMove->getRawSource(), MemMove->getSourceAlignment(), + MemMove->getLength(), MemMove->isVolatile()); Intr->eraseFromParent(); continue; } case Intrinsic::memset: { MemSetInst *MemSet = cast(Intr); Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), - MemSet->getLength(), MemSet->getAlignment(), + MemSet->getLength(), MemSet->getDestAlignment(), MemSet->isVolatile()); Intr->eraseFromParent(); continue; Index: lib/Target/ARM/ARMFastISel.cpp =================================================================== --- lib/Target/ARM/ARMFastISel.cpp +++ lib/Target/ARM/ARMFastISel.cpp @@ -2352,8 +2352,8 @@ for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { // If we're lowering a memory intrinsic instead of a regular call, skip the - // last two arguments, which shouldn't be passed to the underlying function. - if (IntrMemName && e-i <= 2) + // last argument, which shouldn't be passed to the underlying function. + if (IntrMemName && e-i <= 1) break; ISD::ArgFlagsTy Flags; @@ -2546,7 +2546,8 @@ if (!ARMComputeAddress(MTI.getRawDest(), Dest) || !ARMComputeAddress(MTI.getRawSource(), Src)) return false; - unsigned Alignment = MTI.getAlignment(); + unsigned Alignment = std::min(MTI.getDestAlignment(), + MTI.getSourceAlignment()); if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment)) return true; } Index: lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp =================================================================== --- lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -2080,7 +2080,6 @@ // pointer size if it isn't already. LLVMContext &Ctx = SI->getContext(); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); - unsigned Alignment = std::min(SI->getAlignment(), LI->getAlignment()); DebugLoc DLoc = SI->getDebugLoc(); const SCEV *NumBytesS = @@ -2214,12 +2213,14 @@ : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy); NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords}); } else { - NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr, - NumBytes, Alignment); + NewCall = CondBuilder.CreateMemMove(StoreBasePtr, SI->getAlignment(), + LoadBasePtr, LI->getAlignment(), + NumBytes); } } else { - NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, - NumBytes, Alignment); + NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(), + LoadBasePtr, LI->getAlignment(), + NumBytes); // Okay, the memcpy has been formed. Zap the original store and // anything that feeds into it. RecursivelyDeleteTriviallyDeadInstructions(SI, TLI); Index: lib/Target/Mips/MipsFastISel.cpp =================================================================== --- lib/Target/Mips/MipsFastISel.cpp +++ lib/Target/Mips/MipsFastISel.cpp @@ -1628,7 +1628,7 @@ if (!MTI->getLength()->getType()->isIntegerTy(32)) return false; const char *IntrMemName = isa(II) ? "memcpy" : "memmove"; - return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 2); + return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); @@ -1637,7 +1637,7 @@ return false; if (!MSI->getLength()->getType()->isIntegerTy(32)) return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } } return false; Index: lib/Target/X86/X86FastISel.cpp =================================================================== --- lib/Target/X86/X86FastISel.cpp +++ lib/Target/X86/X86FastISel.cpp @@ -2726,7 +2726,7 @@ if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255) return false; - return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1); } case Intrinsic::memset: { const MemSetInst *MSI = cast(II); @@ -2741,7 +2741,7 @@ if (MSI->getDestAddressSpace() > 255) return false; - return lowerCallTo(II, "memset", II->getNumArgOperands() - 2); + return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); } case Intrinsic::stackprotector: { // Emit code to store the stack guard onto the stack. Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -183,17 +183,23 @@ return AMI; } -Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { +Instruction *InstCombiner::SimplifyMemTransfer(MemTransferInst *MI) { unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT); - unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT); - unsigned MinAlign = std::min(DstAlign, SrcAlign); - unsigned CopyAlign = MI->getAlignment(); + unsigned CopyDestAlign = MI->getDestAlignment(); - if (CopyAlign < MinAlign) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false)); + if (CopyDestAlign < DstAlign) { + MI->setDestAlignment(DstAlign); return MI; } + unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT); + unsigned CopySourceAlign = MI->getSourceAlignment(); + + if (CopySourceAlign < SrcAlign) { + MI->setSourceAlignment(SrcAlign); + return MI; + } + // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. ConstantInt *MemOpLength = dyn_cast(MI->getArgOperand(2)); @@ -236,8 +242,8 @@ // If the memcpy/memmove provides better alignment info than we can // infer, use it. - SrcAlign = std::max(SrcAlign, CopyAlign); - DstAlign = std::max(DstAlign, CopyAlign); + SrcAlign = std::max(SrcAlign, CopySourceAlign); + DstAlign = std::max(DstAlign, CopyDestAlign); Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); @@ -264,9 +270,8 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT); - if (MI->getAlignment() < Alignment) { - MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), - Alignment, false)); + if (MI->getDestAlignment() < Alignment) { + MI->setDestAlignment(Alignment); return MI; } @@ -276,7 +281,7 @@ if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) return nullptr; uint64_t Len = LenC->getLimitedValue(); - Alignment = MI->getAlignment(); + Alignment = MI->getDestAlignment(); assert(Len && "0-sized memory setting should be removed already."); // memset(s,c,n) -> store s, c (for n=1,2,4,8) @@ -1864,8 +1869,8 @@ // If we can determine a pointer alignment that is bigger than currently // set, update the alignment. - if (isa(MI)) { - if (Instruction *I = SimplifyMemTransfer(MI)) + if (auto *MTI = dyn_cast(MI)) { + if (Instruction *I = SimplifyMemTransfer(MTI)) return I; } else if (MemSetInst *MSI = dyn_cast(MI)) { if (Instruction *I = SimplifyMemSet(MSI)) Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -790,7 +790,7 @@ bool SimplifyStoreAtEndOfBlock(StoreInst &SI); Instruction *SimplifyElementUnorderedAtomicMemCpy(AtomicMemCpyInst *AMI); - Instruction *SimplifyMemTransfer(MemIntrinsic *MI); + Instruction *SimplifyMemTransfer(MemTransferInst *MI); Instruction *SimplifyMemSet(MemSetInst *MI); Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned); Index: lib/Transforms/Instrumentation/AddressSanitizer.cpp =================================================================== --- lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -2710,7 +2710,8 @@ Arg.replaceAllUsesWith(AI); uint64_t AllocSize = DL.getTypeAllocSize(Ty); - IRB.CreateMemCpy(AI, &Arg, AllocSize, Align); + // FIXME: Can we do better for the destination alignment? + IRB.CreateMemCpy(AI, 1, &Arg, Align, AllocSize); } } } Index: lib/Transforms/Instrumentation/DataFlowSanitizer.cpp =================================================================== --- lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1382,20 +1382,20 @@ Value *LenShadow = IRB.CreateMul( I.getLength(), ConstantInt::get(I.getLength()->getType(), DFSF.DFS.ShadowWidth / 8)); - Value *AlignShadow; - if (ClPreserveAlignment) { - AlignShadow = IRB.CreateMul(I.getAlignmentCst(), - ConstantInt::get(I.getAlignmentCst()->getType(), - DFSF.DFS.ShadowWidth / 8)); - } else { - AlignShadow = ConstantInt::get(I.getAlignmentCst()->getType(), - DFSF.DFS.ShadowWidth / 8); - } Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx); DestShadow = IRB.CreateBitCast(DestShadow, Int8Ptr); SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr); - IRB.CreateCall(I.getCalledValue(), {DestShadow, SrcShadow, LenShadow, - AlignShadow, I.getVolatileCst()}); + auto *MTI = cast( + IRB.CreateCall(I.getCalledValue(), + {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()})); + if (ClPreserveAlignment) { + MTI->setDestAlignment(I.getDestAlignment() * (DFSF.DFS.ShadowWidth / 8)); + MTI->setSourceAlignment(I.getSourceAlignment() * + (DFSF.DFS.ShadowWidth / 8)); + } else { + MTI->setDestAlignment(DFSF.DFS.ShadowWidth / 8); + MTI->setSourceAlignment(DFSF.DFS.ShadowWidth / 8); + } } void DFSanVisitor::visitReturnInst(ReturnInst &RI) { Index: lib/Transforms/Instrumentation/MemorySanitizer.cpp =================================================================== --- lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1246,8 +1246,8 @@ Size, ArgAlign); } else { unsigned CopyAlign = std::min(ArgAlign, kShadowTLSAlignment); - Value *Cpy = - EntryIRB.CreateMemCpy(CpShadowPtr, Base, Size, CopyAlign); + Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base, + CopyAlign, Size); DEBUG(dbgs() << " ByValCpy: " << *Cpy << "\n"); (void)Cpy; } @@ -2768,7 +2768,7 @@ Value *AShadowPtr = getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment).first; - Store = IRB.CreateMemCpy(ArgShadowBase, AShadowPtr, Size, Alignment); + Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr, Alignment, Size); } else { Size = DL.getTypeAllocSize(A->getType()); if (ArgOffset + Size > kParamTLSSize) break; @@ -3119,7 +3119,8 @@ std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr( A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment); - IRB.CreateMemCpy(ShadowBase, ShadowPtr, ArgSize, kShadowTLSAlignment); + IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr, + kShadowTLSAlignment, ArgSize); } else { ArgKind AK = classifyArgument(A); if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset) @@ -3206,7 +3207,7 @@ IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), VAArgOverflowSize); VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); - IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize); } // Instrument va_start. @@ -3226,8 +3227,8 @@ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment); - IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, AMD64FpEndOffset, - Alignment); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, + AMD64FpEndOffset); Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr( IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy), ConstantInt::get(MS.IntptrTy, 8)), @@ -3239,8 +3240,8 @@ Alignment); Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy, AMD64FpEndOffset); - IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, - Alignment); + IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment, + VAArgOverflowSize); } } }; @@ -3331,7 +3332,7 @@ // If there is a va_start in this function, make a backup copy of // va_arg_tls somewhere in the function entry block. VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); - IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize); } // Instrument va_start. @@ -3349,7 +3350,7 @@ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment); - IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, Alignment); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, CopySize); } } }; @@ -3512,7 +3513,7 @@ IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), VAArgOverflowSize); VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); - IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize); } Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize); @@ -3570,7 +3571,7 @@ GrRegSaveAreaShadowPtrOff); Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff); - IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, GrSrcPtr, GrCopySize, 8); + IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, 8, GrSrcPtr, 8, GrCopySize); // Again, but for FP/SIMD values. Value *VrRegSaveAreaShadowPtrOff = @@ -3588,7 +3589,7 @@ VrRegSaveAreaShadowPtrOff); Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff); - IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, VrSrcPtr, VrCopySize, 8); + IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, 8, VrSrcPtr, 8, VrCopySize); // And finally for remaining arguments. Value *StackSaveAreaShadowPtr = @@ -3600,8 +3601,8 @@ IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy, IRB.getInt32(AArch64VAEndOffset)); - IRB.CreateMemCpy(StackSaveAreaShadowPtr, StackSrcPtr, - VAArgOverflowSize, 16); + IRB.CreateMemCpy(StackSaveAreaShadowPtr, 16, StackSrcPtr, 16, + VAArgOverflowSize); } } }; @@ -3659,7 +3660,8 @@ std::tie(AShadowPtr, AOriginPtr) = MSV.getShadowOriginPtr( A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment); - IRB.CreateMemCpy(Base, AShadowPtr, ArgSize, kShadowTLSAlignment); + IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr, + kShadowTLSAlignment, ArgSize); } VAArgOffset += alignTo(ArgSize, 8); } else { @@ -3750,7 +3752,7 @@ // If there is a va_start in this function, make a backup copy of // va_arg_tls somewhere in the function entry block. VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize); - IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8); + IRB.CreateMemCpy(VAArgTLSCopy, 8, MS.VAArgTLS, 8, CopySize); } // Instrument va_start. @@ -3768,7 +3770,8 @@ std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) = MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment); - IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy, CopySize, Alignment); + IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment, + CopySize); } } }; Index: lib/Transforms/Scalar/AlignmentFromAssumptions.cpp =================================================================== --- lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -344,6 +344,8 @@ // instruction, but only for one operand, save it. If we reach the // other operand through another assumption later, then we may // change the alignment at that point. + // FIXME: The above statement is no longer true. Fix the code below to + // be able to reason about different dest/src alignments. if (MemTransferInst *MTI = dyn_cast(MI)) { unsigned NewSrcAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MTI->getSource(), SE); @@ -373,20 +375,22 @@ if (AltSrcAlignment <= std::max(NewDestAlignment, AltDestAlignment)) NewAlignment = std::max(NewAlignment, AltSrcAlignment); - if (NewAlignment > MI->getAlignment()) { - MI->setAlignment(ConstantInt::get(Type::getInt32Ty( - MI->getParent()->getContext()), NewAlignment)); + if (NewAlignment > MTI->getDestAlignment()) { + MTI->setDestAlignment(NewAlignment); + ++NumMemIntAlignChanged; + } + if (NewAlignment > MTI->getSourceAlignment()) { + MTI->setSourceAlignment(NewAlignment); ++NumMemIntAlignChanged; } NewDestAlignments.insert(std::make_pair(MTI, NewDestAlignment)); NewSrcAlignments.insert(std::make_pair(MTI, NewSrcAlignment)); - } else if (NewDestAlignment > MI->getAlignment()) { + } else if (NewDestAlignment > MI->getDestAlignment()) { assert((!isa(MI) || isa(MI)) && "Unknown memory intrinsic"); - MI->setAlignment(ConstantInt::get(Type::getInt32Ty( - MI->getParent()->getContext()), NewDestAlignment)); + MI->setDestAlignment(NewDestAlignment); ++NumMemIntAlignChanged; } } Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -882,7 +882,7 @@ // as any store/memset/memcpy is likely using vector instructions so // shortening it to not vector size is likely to be slower MemIntrinsic *EarlierIntrinsic = cast(EarlierWrite); - unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment(); + unsigned EarlierWriteAlign = EarlierIntrinsic->getDestAlignment(); if (!IsOverwriteEnd) LaterOffset = int64_t(LaterOffset + LaterSize); Index: lib/Transforms/Scalar/InferAddressSpaces.cpp =================================================================== --- lib/Transforms/Scalar/InferAddressSpaces.cpp +++ lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -779,7 +779,7 @@ if (auto *MSI = dyn_cast(MI)) { B.CreateMemSet(NewV, MSI->getValue(), - MSI->getLength(), MSI->getAlignment(), + MSI->getLength(), MSI->getDestAlignment(), false, // isVolatile TBAA, ScopeMD, NoAliasMD); } else if (auto *MTI = dyn_cast(MI)) { @@ -795,14 +795,14 @@ if (isa(MTI)) { MDNode *TBAAStruct = MTI->getMetadata(LLVMContext::MD_tbaa_struct); - B.CreateMemCpy(Dest, Src, MTI->getLength(), - MTI->getAlignment(), + B.CreateMemCpy(Dest, MTI->getDestAlignment(), Src, + MTI->getSourceAlignment(), MTI->getLength(), false, // isVolatile TBAA, TBAAStruct, ScopeMD, NoAliasMD); } else { assert(isa(MTI)); - B.CreateMemMove(Dest, Src, MTI->getLength(), - MTI->getAlignment(), + B.CreateMemMove(Dest, MTI->getDestAlignment(), Src, + MTI->getSourceAlignment(), MTI->getLength(), false, // isVolatile TBAA, ScopeMD, NoAliasMD); } Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -756,7 +756,7 @@ MSIs.insert(MSI); bool NegStride = SizeInBytes == -Stride; return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, - MSI->getAlignment(), SplatValue, MSI, MSIs, Ev, + MSI->getDestAlignment(), SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true); } @@ -1037,16 +1037,18 @@ Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); - unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); CallInst *NewCall = nullptr; // Check whether to generate an unordered atomic memcpy: // If the load or store are atomic, then they must neccessarily be unordered // by previous checks. - if (!SI->isAtomic() && !LI->isAtomic()) - NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); - else { + if (!SI->isAtomic() && !LI->isAtomic()) { + NewCall = + Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(), LoadBasePtr, + LI->getAlignment(), NumBytes); + } else { // We cannot allow unaligned ops for unordered load/store, so reject // anything where the alignment isn't at least the element size. + unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); if (Align < StoreSize) return false; Index: lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -263,7 +263,7 @@ void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { int64_t Size = cast(MSI->getLength())->getZExtValue(); - addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); + addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI); } void addRange(int64_t Start, int64_t Size, Value *Ptr, @@ -498,15 +498,24 @@ return AMemSet; } -static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, - const LoadInst *LI) { +static unsigned findStoreAlignment(const DataLayout &DL, const StoreInst *SI) { unsigned StoreAlign = SI->getAlignment(); if (!StoreAlign) StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType()); + return StoreAlign; +} + +static unsigned findLoadAlignment(const DataLayout &DL, const LoadInst *LI) { unsigned LoadAlign = LI->getAlignment(); if (!LoadAlign) LoadAlign = DL.getABITypeAlignment(LI->getType()); + return LoadAlign; +} +static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, + const LoadInst *LI) { + unsigned StoreAlign = findStoreAlignment(DL, SI); + unsigned LoadAlign = findLoadAlignment(DL, LI); return std::min(StoreAlign, LoadAlign); } @@ -656,19 +665,20 @@ if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc)) UseMemMove = true; - unsigned Align = findCommonAlignment(DL, SI, LI); uint64_t Size = DL.getTypeStoreSize(T); IRBuilder<> Builder(P); Instruction *M; if (UseMemMove) - M = Builder.CreateMemMove(SI->getPointerOperand(), - LI->getPointerOperand(), Size, - Align, SI->isVolatile()); + M = Builder.CreateMemMove( + SI->getPointerOperand(), findStoreAlignment(DL, SI), + LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, + SI->isVolatile()); else - M = Builder.CreateMemCpy(SI->getPointerOperand(), - LI->getPointerOperand(), Size, - Align, SI->isVolatile()); + M = Builder.CreateMemCpy( + SI->getPointerOperand(), findStoreAlignment(DL, SI), + LI->getPointerOperand(), findLoadAlignment(DL, LI), Size, + SI->isVolatile()); DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => " << *M << "\n"); @@ -1047,20 +1057,17 @@ // If all checks passed, then we can transform M. - // Make sure to use the lesser of the alignment of the source and the dest - // since we're changing where we're reading from, but don't want to increase - // the alignment past what can be read from or written to. // TODO: Is this worth it if we're creating a less aligned memcpy? For // example we could be moving from movaps -> movq on x86. - unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); - IRBuilder<> Builder(M); if (UseMemMove) - Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), - Align, M->isVolatile()); + Builder.CreateMemMove(M->getRawDest(), M->getDestAlignment(), + MDep->getRawSource(), MDep->getSourceAlignment(), + M->getLength(), M->isVolatile()); else - Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), - Align, M->isVolatile()); + Builder.CreateMemCpy(M->getRawDest(), M->getDestAlignment(), + MDep->getRawSource(), MDep->getSourceAlignment(), + M->getLength(), M->isVolatile()); // Remove the instruction we're replacing. MD->removeInstruction(M); @@ -1106,7 +1113,7 @@ // If Dest is aligned, and SrcSize is constant, use the minimum alignment // of the sum. const unsigned DestAlign = - std::max(MemSet->getAlignment(), MemCpy->getAlignment()); + std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment()); if (DestAlign > 1) if (ConstantInt *SrcSizeC = dyn_cast(SrcSize)) Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign); @@ -1166,7 +1173,7 @@ IRBuilder<> Builder(MemCpy); Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1), - CopySize, MemCpy->getAlignment()); + CopySize, MemCpy->getDestAlignment()); return true; } @@ -1192,7 +1199,7 @@ if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { IRBuilder<> Builder(M); Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(), - M->getAlignment(), false); + M->getDestAlignment(), false); MD->removeInstruction(M); M->eraseFromParent(); ++NumCpyToSet; @@ -1221,8 +1228,11 @@ // d) memcpy from a just-memset'd source can be turned into memset. if (DepInfo.isClobber()) { if (CallInst *C = dyn_cast(DepInfo.getInst())) { + // FIXME: Can we pass in either of dest/src alignment here instead of + // conservatively taking the minimum? + unsigned Align = std::min(M->getDestAlignment(), M->getSourceAlignment()); if (performCallSlotOptzn(M, M->getDest(), M->getSource(), - CopySize->getZExtValue(), M->getAlignment(), + CopySize->getZExtValue(), Align, C)) { MD->removeInstruction(M); M->eraseFromParent(); @@ -1337,7 +1347,10 @@ // source of the memcpy to the alignment we need. If we fail, we bail out. AssumptionCache &AC = LookupAssumptionCache(); DominatorTree &DT = LookupDomTree(); - if (MDep->getAlignment() < ByValAlign && + // FIXME: Can we use either of dest/src alignment here instead of conservatively + // taking the minimum? + unsigned MinAlign = std::min(MDep->getDestAlignment(), MDep->getSourceAlignment()); + if (MinAlign < ByValAlign && getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, CS.getInstruction(), &AC, &DT) < ByValAlign) return false; Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -2684,8 +2684,7 @@ assert(!IsSplit); assert(NewBeginOffset == BeginOffset); II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType())); - Type *CstTy = II.getAlignmentCst()->getType(); - II.setAlignment(ConstantInt::get(CstTy, getSliceAlign())); + II.setDestAlignment(getSliceAlign()); deleteIfTriviallyDead(OldPtr); return false; @@ -2801,15 +2800,15 @@ // update both source and dest of a single call. if (!IsSplittable) { Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); - if (IsDest) + if (IsDest) { II.setDest(AdjustedPtr); - else + if (II.getDestAlignment() > SliceAlign) + II.setDestAlignment(MinAlign(II.getDestAlignment(), SliceAlign)); + } + else { II.setSource(AdjustedPtr); - - if (II.getAlignment() > SliceAlign) { - Type *CstTy = II.getAlignmentCst()->getType(); - II.setAlignment( - ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign))); + if (II.getSourceAlignment() > SliceAlign) + II.setSourceAlignment(MinAlign(II.getSourceAlignment(), SliceAlign)); } DEBUG(dbgs() << " to: " << II << "\n"); @@ -2862,8 +2861,10 @@ // Compute the relative offset for the other pointer within the transfer. unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS); APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset); - unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1, - OtherOffset.zextOrTrunc(64).getZExtValue()); + unsigned OtherAlign = + IsDest ? II.getSourceAlignment() : II.getDestAlignment(); + OtherAlign = MinAlign(OtherAlign ? OtherAlign : 1, + OtherOffset.zextOrTrunc(64).getZExtValue()); if (EmitMemCpy) { // Compute the other pointer, folding as much as possible to produce @@ -2876,8 +2877,9 @@ Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); CallInst *New = IRB.CreateMemCpy( - IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size, - MinAlign(SliceAlign, OtherAlign), II.isVolatile()); + IsDest ? OurPtr : OtherPtr, IsDest ? SliceAlign : OtherAlign, + IsDest ? OtherPtr : OurPtr, IsDest ? OtherAlign : SliceAlign, + Size, II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return false; Index: lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- lib/Transforms/Utils/InlineFunction.cpp +++ lib/Transforms/Utils/InlineFunction.cpp @@ -1247,7 +1247,7 @@ // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. - Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); + Builder.CreateMemCpy(Dst, 1, Src, 1, Size); } /// When inlining a call site that has a byval argument, Index: lib/Transforms/Utils/LowerMemIntrinsics.cpp =================================================================== --- lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -409,8 +409,8 @@ /* SrcAddr */ Memcpy->getRawSource(), /* DstAddr */ Memcpy->getRawDest(), /* CopyLen */ CI, - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), + /* SrcAlign */ Memcpy->getSourceAlignment(), + /* DestAlign */ Memcpy->getDestAlignment(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), /* TargetTransformInfo */ TTI); @@ -419,8 +419,8 @@ /* SrcAddr */ Memcpy->getRawSource(), /* DstAddr */ Memcpy->getRawDest(), /* CopyLen */ Memcpy->getLength(), - /* SrcAlign */ Memcpy->getAlignment(), - /* DestAlign */ Memcpy->getAlignment(), + /* SrcAlign */ Memcpy->getSourceAlignment(), + /* DestAlign */ Memcpy->getDestAlignment(), /* SrcIsVolatile */ Memcpy->isVolatile(), /* DstIsVolatile */ Memcpy->isVolatile(), /* TargetTransfomrInfo */ TTI); @@ -432,8 +432,8 @@ /* SrcAddr */ Memmove->getRawSource(), /* DstAddr */ Memmove->getRawDest(), /* CopyLen */ Memmove->getLength(), - /* SrcAlign */ Memmove->getAlignment(), - /* DestAlign */ Memmove->getAlignment(), + /* SrcAlign */ Memmove->getSourceAlignment(), + /* DestAlign */ Memmove->getDestAlignment(), /* SrcIsVolatile */ Memmove->isVolatile(), /* DstIsVolatile */ Memmove->isVolatile()); } @@ -443,6 +443,6 @@ /* DstAddr */ Memset->getRawDest(), /* CopyLen */ Memset->getLength(), /* SetValue */ Memset->getValue(), - /* Alignment */ Memset->getAlignment(), + /* Alignment */ Memset->getDestAlignment(), Memset->isVolatile()); } Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -156,9 +156,8 @@ // We have enough information to now generate the memcpy call to do the // concatenation for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(CpyDst, Src, - ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1), - 1); + B.CreateMemCpy(CpyDst, 1, Src, 1, + ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1)); return Dst; } @@ -346,8 +345,8 @@ // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, Src, - ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1); + B.CreateMemCpy(Dst, 1, Src, 1, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len)); return Dst; } @@ -371,7 +370,7 @@ // We have enough information to now generate the memcpy call to do the // copy for us. Make a memcpy to copy the nul byte with align = 1. - B.CreateMemCpy(Dst, Src, LenV, 1); + B.CreateMemCpy(Dst, 1, Src, 1, LenV); return DstEnd; } @@ -408,7 +407,7 @@ Type *PT = Callee->getFunctionType()->getParamType(0); // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] - B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); + B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len)); return Dst; } @@ -817,15 +816,15 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + CI->getArgOperand(2)); return CI->getArgOperand(0); } Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + CI->getArgOperand(2)); return CI->getArgOperand(0); } @@ -1831,10 +1830,10 @@ return nullptr; // we found a format specifier, bail out. // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - ConstantInt::get(DL.getIntPtrType(CI->getContext()), - FormatStr.size() + 1), - 1); // Copy the null byte. + B.CreateMemCpy( + CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + ConstantInt::get(DL.getIntPtrType(CI->getContext()), + FormatStr.size() + 1)); // Copy the null byte. return ConstantInt::get(CI->getType(), FormatStr.size()); } @@ -1868,7 +1867,7 @@ return nullptr; Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, IncLen); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -2393,8 +2392,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2, false)) { - B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + CI->getArgOperand(2)); return CI->getArgOperand(0); } return nullptr; @@ -2403,8 +2402,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI, IRBuilder<> &B) { if (isFortifiedCallFoldable(CI, 3, 2, false)) { - B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), - CI->getArgOperand(2), 1); + B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, + CI->getArgOperand(2)); return CI->getArgOperand(0); } return nullptr; Index: test/Analysis/AliasSet/memtransfer.ll =================================================================== --- test/Analysis/AliasSet/memtransfer.ll +++ test/Analysis/AliasSet/memtransfer.ll @@ -14,7 +14,7 @@ %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } @@ -30,7 +30,7 @@ %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 true) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 true) store i8 1, i8* %b, align 1 ret void } @@ -46,7 +46,7 @@ %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } @@ -62,7 +62,7 @@ %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i32 1, i1 true) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 1, i1 true) store i8 1, i8* %b, align 1 ret void } @@ -76,7 +76,7 @@ %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } @@ -90,7 +90,7 @@ %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } @@ -104,11 +104,11 @@ %a = alloca i8, align 1 %b = alloca i8, align 1 store i8 1, i8* %a, align 1 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 1, i1 false) store i8 1, i8* %b, align 1 ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) -declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) Index: test/Analysis/BasicAA/assume.ll =================================================================== --- test/Analysis/BasicAA/assume.ll +++ test/Analysis/BasicAA/assume.ll @@ -1,12 +1,12 @@ ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0 declare void @llvm.assume(i1) #0 define void @test1(i8* %P, i8* %Q) nounwind ssp { tail call void @llvm.assume(i1 true) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test1: @@ -14,10 +14,10 @@ ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.assume(i1 true) ; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.assume(i1 true) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.assume(i1 true) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.assume(i1 true) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.assume(i1 true) } attributes #0 = { nounwind } Index: test/Analysis/BasicAA/cs-cs.ll =================================================================== --- test/Analysis/BasicAA/cs-cs.ll +++ test/Analysis/BasicAA/cs-cs.ll @@ -2,48 +2,48 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "arm-apple-ios" -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #0 -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0 +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #0 declare void @a_readonly_func(i8*) #1 declare void @a_writeonly_func(i8*) #2 define void @test2(i8* %P, i8* %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2a(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2a: ; CHECK: NoAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2b(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 12 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2b: @@ -51,20 +51,20 @@ ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2c(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 11 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2c: @@ -72,20 +72,20 @@ ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2d(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 -12 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2d: @@ -93,20 +93,20 @@ ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test2e(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) %R = getelementptr i8, i8* %P, i64 -11 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test2e: @@ -114,67 +114,67 @@ ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %R, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test3(i8* %P, i8* %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test3: ; CHECK: MayAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) } define void @test3a(i8* noalias %P, i8* noalias %Q) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test3a: ; CHECK: NoAlias: i8* %P, i8* %Q -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i1 false) } define void @test4(i8* %P, i8* noalias %Q) #3 { - tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test4: ; CHECK: NoAlias: i8* %P, i8* %Q -; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false) +; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) +; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i1 false) } define void @test5(i8* %P, i8* %Q, i8* %R) #3 { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test5: @@ -182,19 +182,19 @@ ; CHECK: MayAlias: i8* %P, i8* %Q ; CHECK: MayAlias: i8* %P, i8* %R ; CHECK: MayAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Both ModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test5a(i8* noalias %P, i8* noalias %Q, i8* noalias %R) nounwind ssp { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test5a: @@ -202,27 +202,27 @@ ; CHECK: NoAlias: i8* %P, i8* %Q ; CHECK: NoAlias: i8* %P, i8* %R ; CHECK: NoAlias: i8* %Q, i8* %R -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: NoModRef: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %R <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i1 false) <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) } define void @test6(i8* %P) #3 { - call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false) call void @a_readonly_func(i8* %P) ret void ; CHECK-LABEL: Function: test6: -; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) +; CHECK: Just Mod (MustAlias): Ptr: i8* %P <-> call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false) ; CHECK: Just Ref: Ptr: i8* %P <-> call void @a_readonly_func(i8* %P) -; CHECK: Just Mod: call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) <-> call void @a_readonly_func(i8* %P) -; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false) +; CHECK: Just Mod: call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false) <-> call void @a_readonly_func(i8* %P) +; CHECK: Just Ref: call void @a_readonly_func(i8* %P) <-> call void @llvm.memset.p0i8.i64(i8* align 8 %P, i8 -51, i64 32, i1 false) } define void @test7(i8* %P) #3 { Index: test/Analysis/BasicAA/gep-and-alias.ll =================================================================== --- test/Analysis/BasicAA/gep-and-alias.ll +++ test/Analysis/BasicAA/gep-and-alias.ll @@ -6,13 +6,13 @@ ; The load and store address in the loop body could alias so the load ; can't be hoisted above the store and out of the loop. -declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) define i32 @foo(i32 %x, i32 %z, i32 %n) { entry: %pool = alloca [59 x i32], align 4 %tmp = bitcast [59 x i32]* %pool to i8* - call void @llvm.memset.p0i8.i32(i8* nonnull %tmp, i8 0, i32 236, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 nonnull %tmp, i8 0, i32 236, i1 false) %cmp3 = icmp eq i32 %n, 0 br i1 %cmp3, label %for.end, label %for.body.lr.ph Index: test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll =================================================================== --- test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll +++ test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll @@ -12,15 +12,15 @@ ret void } -; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) -; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind @A = external global i8 @B = external global i8 define void @test1() { - call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) - call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i1 false) ret void } Index: test/Analysis/BasicAA/guards.ll =================================================================== --- test/Analysis/BasicAA/guards.ll +++ test/Analysis/BasicAA/guards.ll @@ -1,23 +1,23 @@ ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #0 declare void @llvm.experimental.guard(i1, ...) declare void @unknown_but_readonly() readonly define void @test1(i8* %P, i8* %Q) { tail call void(i1,...) @llvm.experimental.guard(i1 true) [ "deopt"() ] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) ret void ; CHECK-LABEL: Function: test1: ; CHECK: Just Ref: Ptr: i8* %P <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] ; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] -; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Ref: tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) -; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] +; CHECK: Just Mod: Ptr: i8* %P <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: Ptr: i8* %Q <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Ref: tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] <-> tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) +; CHECK: Just Mod: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i1 false) <-> tail call void (i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ] } define void @test2() { Index: test/Analysis/BasicAA/modref.ll =================================================================== --- test/Analysis/BasicAA/modref.ll +++ test/Analysis/BasicAA/modref.ll @@ -11,7 +11,7 @@ store i32 0, i32* %A - call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i1 false) %B = load i32, i32* %A ret i32 %B @@ -27,7 +27,7 @@ store i8 2, i8* %B ;; Not written to by memcpy - call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i1 false) %C = load i8, i8* %B ret i8 %C @@ -38,7 +38,7 @@ ; CHECK-LABEL: @test2 %P2 = getelementptr i8, i8* %P, i32 127 store i8 1, i8* %P2 ;; Not dead across memset - call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false) + call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i1 false) %A = load i8, i8* %P2 ret i8 %A ; CHECK: ret i8 1 @@ -51,7 +51,7 @@ ;; FIXME: DSE isn't zapping this dead store. store i8 1, i8* %P2 ;; Dead, clobbered by memset. - call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false) + call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i1 false) %A = load i8, i8* %P2 ret i8 %A ; CHECK-NOT: load @@ -91,7 +91,7 @@ define i32 @test4(i8* %P) { %tmp = load i32, i32* @G1 - call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i1 false) %tmp2 = load i32, i32* @G1 %sub = sub i32 %tmp2, %tmp ret i32 %sub @@ -106,7 +106,7 @@ ; write to G1. define i32 @test5(i8* %P, i32 %Len) { %tmp = load i32, i32* @G1 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i1 false) %tmp2 = load i32, i32* @G1 %sub = sub i32 %tmp2, %tmp ret i32 %sub @@ -227,7 +227,7 @@ ; CHECK: ret i32 0 } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind Index: test/Analysis/CFLAliasAnalysis/Andersen/assign.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/assign.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/assign.ll @@ -21,4 +21,4 @@ %d = bitcast i64* %b to i32* %e = select i1 %cond, i32* %c, i32* %d ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/assign2.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/assign2.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/assign2.ll @@ -20,4 +20,4 @@ %d = bitcast i64* %a to i32* %e = select i1 %cond, i32* %c, i32* %b ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-deref-escape.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-deref-escape.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-deref-escape.ll @@ -30,4 +30,4 @@ %c = load i32*, i32** %x ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-escape.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-escape.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-arg-escape.ll @@ -28,4 +28,4 @@ %d = load i32*, i32** %x ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-arg.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-arg.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-arg.ll @@ -23,4 +23,4 @@ %c = call i32* @return_arg_callee(i32* %a, i32* %b) ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg-multilevel.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg-multilevel.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg-multilevel.ll @@ -49,4 +49,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-deref-arg.ll @@ -34,4 +34,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-escape.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-escape.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-escape.ll @@ -30,4 +30,4 @@ %d = load i32*, i32** %x ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg-multilevel.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg-multilevel.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg-multilevel.ll @@ -50,4 +50,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-ref-arg.ll @@ -34,4 +34,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-unknown.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-unknown.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-ret-unknown.ll @@ -35,4 +35,4 @@ %b = load i32*, i32** %a ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-multilevel.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-multilevel.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-multilevel.ll @@ -42,4 +42,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-unknown.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-unknown.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg-unknown.ll @@ -29,4 +29,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/interproc-store-arg.ll @@ -37,4 +37,4 @@ %lq = load i32*, i32** %q ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Andersen/memalias.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Andersen/memalias.ll +++ test/Analysis/CFLAliasAnalysis/Andersen/memalias.ll @@ -18,4 +18,4 @@ %c = bitcast i64** %b to i32** %d = load i32*, i32** %c ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-arg-deref-escape.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-arg-deref-escape.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-arg-deref-escape.ll @@ -30,4 +30,4 @@ %c = load i32*, i32** %x ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-arg-escape.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-arg-escape.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-arg-escape.ll @@ -28,4 +28,4 @@ %d = load i32*, i32** %x ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-arg.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-arg.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-arg.ll @@ -21,4 +21,4 @@ %c = call i32* @return_arg_callee(i32* %a, i32* %b) ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-deref-arg-multilevel.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-deref-arg-multilevel.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-deref-arg-multilevel.ll @@ -43,4 +43,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-deref-arg.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-deref-arg.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-deref-arg.ll @@ -27,4 +27,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-escape.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-escape.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-escape.ll @@ -30,4 +30,4 @@ %d = load i32*, i32** %x ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-ref-arg-multilevel.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-ref-arg-multilevel.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-ref-arg-multilevel.ll @@ -48,4 +48,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-ref-arg.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-ref-arg.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-ref-arg.ll @@ -33,4 +33,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-unknown.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-unknown.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-ret-unknown.ll @@ -35,4 +35,4 @@ %b = load i32*, i32** %a ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg-unknown.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg-unknown.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg-unknown.ll @@ -29,4 +29,4 @@ %lp = load i32*, i32** %p ret void -} \ No newline at end of file +} Index: test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg.ll =================================================================== --- test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg.ll +++ test/Analysis/CFLAliasAnalysis/Steensgaard/interproc-store-arg.ll @@ -33,4 +33,4 @@ %lq = load i32*, i32** %q ret void -} \ No newline at end of file +} Index: test/Analysis/CallGraph/no-intrinsics.ll =================================================================== --- test/Analysis/CallGraph/no-intrinsics.ll +++ test/Analysis/CallGraph/no-intrinsics.ll @@ -3,10 +3,10 @@ ; Check that intrinsics aren't added to the call graph -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define void @f(i8* %out, i8* %in) { - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %in, i32 100, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %out, i8* align 4 %in, i32 100, i1 false) ret void } Index: test/Analysis/ConstantFolding/gep-constanfolding-error.ll =================================================================== --- test/Analysis/ConstantFolding/gep-constanfolding-error.ll +++ test/Analysis/ConstantFolding/gep-constanfolding-error.ll @@ -43,10 +43,10 @@ %scevgep = getelementptr [6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i32 0, i32 %5, i32 %8 %9 = add i32 %f.promoted, %smax %10 = add i32 %9, 2 - call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i32 1, i1 false) -; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %scevgep, i8 %conv6, i32 %10, i1 false) +; CHECK: call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i32 0, i{{32|64}} 5, i{{32|64}} 4, i32 1), i8 %conv6, i32 1, i1 false) ; CHECK-NOT: call void @llvm.memset.p0i8.i32(i8* getelementptr ([6 x [6 x [7 x i8]]], [6 x [6 x [7 x i8]]]* @j, i64 1, i64 4, i64 4, i32 1) ret i32 0 } ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) Index: test/Analysis/DependenceAnalysis/Preliminary.ll =================================================================== --- test/Analysis/DependenceAnalysis/Preliminary.ll +++ test/Analysis/DependenceAnalysis/Preliminary.ll @@ -696,4 +696,4 @@ ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind Index: test/Analysis/GlobalsModRef/memset-escape.ll =================================================================== --- test/Analysis/GlobalsModRef/memset-escape.ll +++ test/Analysis/GlobalsModRef/memset-escape.ll @@ -22,7 +22,7 @@ %c = alloca [1 x i32], align 4 store i32 0, i32* %retval, align 4 %0 = bitcast [1 x i32]* %c to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 4, i1 false) store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4 store i32 0, i32* @b, align 4 br label %for.cond @@ -59,7 +59,7 @@ } ; Function Attrs: nounwind argmemonly -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind argmemonly ; Function Attrs: noreturn nounwind declare void @abort() noreturn nounwind Index: test/Analysis/GlobalsModRef/no-escape.ll =================================================================== --- test/Analysis/GlobalsModRef/no-escape.ll +++ test/Analysis/GlobalsModRef/no-escape.ll @@ -59,7 +59,7 @@ } ; Function Attrs: nounwind argmemonly -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind argmemonly ; Function Attrs: noreturn nounwind declare void @abort() noreturn nounwind Index: test/Analysis/GlobalsModRef/pr12351.ll =================================================================== --- test/Analysis/GlobalsModRef/pr12351.ll +++ test/Analysis/GlobalsModRef/pr12351.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -basicaa -globals-aa -gvn -S -disable-verify | FileCheck %s -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define void @foo(i8* %x, i8* %y) { - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i32 1, i1 false); + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i1 false); ret void } Index: test/Analysis/GlobalsModRef/volatile-instrs.ll =================================================================== --- test/Analysis/GlobalsModRef/volatile-instrs.ll +++ test/Analysis/GlobalsModRef/volatile-instrs.ll @@ -10,7 +10,7 @@ @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 declare i32 @printf(i8* nocapture, ...) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind ; Make sure that the initial memcpy call does not go away @@ -21,10 +21,10 @@ define i32 @main() nounwind uwtable ssp { main_entry: - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.anon* @b to i8*), i8* align 4 bitcast (%struct.anon* @a to i8*), i64 12, i1 false) %0 = load volatile i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @b, i64 0, i32 0), align 4 store i32 %0, i32* @c, align 4 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast (%struct.anon* @b to i8*), i8* align 4 bitcast (%struct.anon* @a to i8*), i64 12, i1 false) nounwind %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind ret i32 0 } Index: test/Analysis/Lint/address-spaces.ll =================================================================== --- test/Analysis/Lint/address-spaces.ll +++ test/Analysis/Lint/address-spaces.ll @@ -22,4 +22,4 @@ define <4 x i32 addrspace(1)*> @test2_vector(<4 x i64> %x) nounwind { %y = inttoptr <4 x i64> %x to <4 x i32 addrspace(1)*> ret <4 x i32 addrspace(1)*> %y -} \ No newline at end of file +} Index: test/Analysis/Lint/noalias-byval.ll =================================================================== --- test/Analysis/Lint/noalias-byval.ll +++ test/Analysis/Lint/noalias-byval.ll @@ -3,10 +3,10 @@ %s = type { i8 } ; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #0 ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i1) #0 declare void @f1(%s* noalias nocapture sret, %s* nocapture readnone) @@ -16,7 +16,7 @@ %tmp = alloca %s %0 = bitcast %s* %c to i8* %1 = bitcast %s* %tmp to i8* - call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i1 false) call void @f1(%s* sret %c, %s* %c) ret void } @@ -34,7 +34,7 @@ %tmp = alloca %s %0 = bitcast %s* %c to i8* %1 = bitcast %s* %tmp to i8* - call void @llvm.memset.p0i8.i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i1 false) call void @f3(%s* sret %c, %s* byval %c) ret void } Index: test/Analysis/MemorySSA/basicaa-memcpy.ll =================================================================== --- test/Analysis/MemorySSA/basicaa-memcpy.ll +++ test/Analysis/MemorySSA/basicaa-memcpy.ll @@ -1,16 +1,16 @@ ; RUN: opt -disable-output -basicaa -print-memoryssa %s 2>&1 | FileCheck %s -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define void @source_clobber(i8* %a, i8* %b) { ; CHECK-LABEL: @source_clobber( ; CHECK-NEXT: ; 1 = MemoryDef(liveOnEntry) -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i32 1, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 false) ; CHECK-NEXT: ; MemoryUse(liveOnEntry) ; CHECK-NEXT: [[X:%.*]] = load i8, i8* %b ; CHECK-NEXT: ret void ; - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 128, i1 false) %x = load i8, i8* %b ret void } Index: test/Analysis/PostDominators/pr24415.ll =================================================================== --- test/Analysis/PostDominators/pr24415.ll +++ test/Analysis/PostDominators/pr24415.ll @@ -15,4 +15,4 @@ ; CHECK-NEXT: [1] <> ; CHECK-NEXT: [2] %2 ; CHECK-NEXT: [2] %1 -; CHECK-NEXT: [3] %0 \ No newline at end of file +; CHECK-NEXT: [3] %0 Index: test/Analysis/PostDominators/pr6047_b.ll =================================================================== --- test/Analysis/PostDominators/pr6047_b.ll +++ test/Analysis/PostDominators/pr6047_b.ll @@ -22,4 +22,4 @@ ; CHECK-NEXT: [3] %bb35.loopexit3 ; CHECK-NEXT: [2] %a ; CHECK-NEXT: [2] %entry -; CHECK-NEXT: [2] %bb3.i \ No newline at end of file +; CHECK-NEXT: [2] %bb3.i Index: test/Analysis/PostDominators/pr6047_c.ll =================================================================== --- test/Analysis/PostDominators/pr6047_c.ll +++ test/Analysis/PostDominators/pr6047_c.ll @@ -194,4 +194,4 @@ ; CHECK-NEXT: [3] %bb35.loopexit3 ; CHECK-NEXT: [2] %entry ; CHECK-NEXT: [2] %bb3.i -; CHECK-NEXT: Roots: %bb35 %bb3.i \ No newline at end of file +; CHECK-NEXT: Roots: %bb35 %bb3.i Index: test/Analysis/PostDominators/pr6047_d.ll =================================================================== --- test/Analysis/PostDominators/pr6047_d.ll +++ test/Analysis/PostDominators/pr6047_d.ll @@ -29,4 +29,4 @@ ; CHECK-NEXT: [3] %a ; CHECK-NEXT: [3] %entry ; CHECK-NEXT: [3] %b -; CHECK-NEXT: [2] %bb3.i \ No newline at end of file +; CHECK-NEXT: [2] %bb3.i Index: test/Analysis/RegionInfo/infinite_loop.ll =================================================================== --- test/Analysis/RegionInfo/infinite_loop.ll +++ test/Analysis/RegionInfo/infinite_loop.ll @@ -16,4 +16,4 @@ } ; CHECK-NOT: => ; CHECK: [0] 0 => -; STAT: 1 region - The # of regions \ No newline at end of file +; STAT: 1 region - The # of regions Index: test/Analysis/RegionInfo/infinite_loop_4.ll =================================================================== --- test/Analysis/RegionInfo/infinite_loop_4.ll +++ test/Analysis/RegionInfo/infinite_loop_4.ll @@ -48,4 +48,4 @@ ; BBIT: 5, 11, 12, ; RNIT: 0, 7, 1, 2 => 10, 10, 8, 9, 13, 14, 3, 4, ; RNIT: 2, 5 => 6, 6, -; RNIT: 5, 11, 12, \ No newline at end of file +; RNIT: 5, 11, 12, Index: test/Analysis/ScalarEvolution/avoid-assume-hang.ll =================================================================== --- test/Analysis/ScalarEvolution/avoid-assume-hang.ll +++ test/Analysis/ScalarEvolution/avoid-assume-hang.ll @@ -136,4 +136,4 @@ exit: ret void -} \ No newline at end of file +} Index: test/Analysis/ScalarEvolution/avoid-smax-1.ll =================================================================== --- test/Analysis/ScalarEvolution/avoid-smax-1.ll +++ test/Analysis/ScalarEvolution/avoid-smax-1.ll @@ -172,7 +172,7 @@ %55 = mul i32 %y.21, %w ; [#uses=1] %.sum5 = add i32 %55, %.sum3 ; [#uses=1] %56 = getelementptr i8, i8* %j, i32 %.sum5 ; [#uses=1] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i1 false) %57 = add i32 %y.21, 1 ; [#uses=2] br label %bb24 @@ -189,7 +189,7 @@ %60 = getelementptr i8, i8* %j, i32 %.sum4 ; [#uses=1] %61 = mul i32 %x, %w ; [#uses=1] %62 = sdiv i32 %61, 2 ; [#uses=1] - tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i1 false) ret void bb29: ; preds = %bb20, %entry @@ -207,7 +207,7 @@ %67 = getelementptr i8, i8* %r, i32 %66 ; [#uses=1] %68 = mul i32 %y.310, %w ; [#uses=1] %69 = getelementptr i8, i8* %j, i32 %68 ; [#uses=1] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i1 false) %70 = add i32 %y.310, 1 ; [#uses=2] br label %bb31 @@ -223,12 +223,12 @@ %73 = getelementptr i8, i8* %j, i32 %72 ; [#uses=1] %74 = mul i32 %x, %w ; [#uses=1] %75 = sdiv i32 %74, 2 ; [#uses=1] - tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i1 false) ret void return: ; preds = %bb20 ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind Index: test/Analysis/ScalarEvolution/trip-count.ll =================================================================== --- test/Analysis/ScalarEvolution/trip-count.ll +++ test/Analysis/ScalarEvolution/trip-count.ll @@ -41,7 +41,7 @@ entry: %bins = alloca [16 x i64], align 16 %0 = bitcast [16 x i64]* %bins to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 128, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 %0, i8 0, i64 128, i1 false) br label %preheader preheader: ; preds = %for.inc.1, %entry @@ -88,7 +88,7 @@ } ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0 declare void @may_exit() nounwind Index: test/Analysis/ScalarEvolution/trip-count3.ll =================================================================== --- test/Analysis/ScalarEvolution/trip-count3.ll +++ test/Analysis/ScalarEvolution/trip-count3.ll @@ -50,7 +50,7 @@ bb2.i: ; preds = %bb3.i %1 = getelementptr %struct.SHA_INFO, %struct.SHA_INFO* %sha_info, i64 0, i32 3 %2 = bitcast [16 x i32]* %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i1 false) %3 = getelementptr %struct.SHA_INFO, %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0 %4 = bitcast i32* %3 to i8* br label %codeRepl @@ -74,7 +74,7 @@ declare void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8*) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind Index: test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll =================================================================== --- test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -45,13 +45,13 @@ ; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) #4 { define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !1 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i1 false), !tbaa !1 ret void } ; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture readonly %q, i64 %n) #3 { define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !2 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i1 false), !tbaa !2 ret void } @@ -70,7 +70,7 @@ } declare void @callee(i32* %p) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind ; CHECK: attributes #0 = { norecurse nounwind readnone } ; CHECK: attributes #1 = { norecurse nounwind } Index: test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll =================================================================== --- test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll +++ test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll @@ -6,17 +6,17 @@ ; it has a TBAA tag which declares that it is unrelated. ; CHECK: @foo -; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0 +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %p, i8* align 1 %q, i64 16, i1 false), !tbaa !0 ; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa [[TAGA:!.*]] ; CHECK-NEXT: ret void define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind { - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2 + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i1 false), !tbaa !2 store i8 2, i8* %s, align 1, !tbaa !1 - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i32 1, i1 false), !tbaa !2 + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i1 false), !tbaa !2 ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind ; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0} ; CHECK: [[TYPEA]] = !{!"A", !{{.*}}} Index: test/Bindings/llvm-c/invoke.ll =================================================================== --- test/Bindings/llvm-c/invoke.ll +++ test/Bindings/llvm-c/invoke.ll @@ -80,4 +80,4 @@ declare i32 @llvm.eh.typeid.for(i8*) #1 attributes #0 = { noreturn } -attributes #1 = { nounwind readnone } \ No newline at end of file +attributes #1 = { nounwind readnone } Index: test/Bitcode/constantsTest.3.2.ll =================================================================== --- test/Bitcode/constantsTest.3.2.ll +++ test/Bitcode/constantsTest.3.2.ll @@ -121,4 +121,4 @@ insertvalue { i32, float } { i32 1, float 2.0 }, i32 0, 0 ret void -} \ No newline at end of file +} Index: test/Bitcode/standardCIntrinsic.3.2.ll =================================================================== --- test/Bitcode/standardCIntrinsic.3.2.ll +++ test/Bitcode/standardCIntrinsic.3.2.ll @@ -7,10 +7,10 @@ define void @memcpyintrinsic(i8* %dest, i8* %src, i32 %len) { entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 true) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 true) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 %len, i1 true) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 true) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 %align, i1 %isvolatile) \ No newline at end of file +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) Index: test/Bitcode/upgrade-memory-intrinsics.ll =================================================================== --- /dev/null +++ test/Bitcode/upgrade-memory-intrinsics.ll @@ -0,0 +1,36 @@ +; RUN: opt -S < %s | FileCheck %s + +; Test to ensure that calls to the memcpy/memmove/memset intrinsics are auto-upgraded +; to remove the alignment parameter in favour of align attributes on the pointer args. + +; Make sure a non-zero alignment is propagated +define void @test(i8* %p1, i8* %p2, i8* %p3) { +; CHECK-LABEL: @test +; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %p1, i8 55, i64 100, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p1, i8* align 4 %p2, i64 50, i1 false) +; CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 %p2, i8* align 4 %p3, i64 1000, i1 false) + call void @llvm.memset.p0i8.i64(i8* %p1, i8 55, i64 100, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p1, i8* %p2, i64 50, i32 4, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %p2, i8* %p3, i64 1000, i32 4, i1 false) + ret void +} + +; Make sure that a zero alignment is handled properly +define void @test2(i8* %p1, i8* %p2, i8* %p3) { +; CHECK-LABEL: @test +; CHECK: call void @llvm.memset.p0i8.i64(i8* %p1, i8 55, i64 100, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p1, i8* %p2, i64 50, i1 false) +; CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* %p2, i8* %p3, i64 1000, i1 false) + call void @llvm.memset.p0i8.i64(i8* %p1, i8 55, i64 100, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p1, i8* %p2, i64 50, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %p2, i8* %p3, i64 1000, i32 0, i1 false) + ret void +} + +; CHECK: declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) +; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) +; CHECK: declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly , i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) + Index: test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll =================================================================== --- test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -1147,7 +1147,7 @@ ret void()* @allocai64 } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) define void @test_memcpy(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memcpy ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 @@ -1157,11 +1157,11 @@ ; CHECK: %x1 = COPY [[SRC]] ; CHECK: %x2 = COPY [[SIZE]] ; CHECK: BL $memcpy, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } -declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i32 %align, i1 %volatile) +declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i1) define void @test_memmove(i8* %dst, i8* %src, i64 %size) { ; CHECK-LABEL: name: test_memmove ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 @@ -1171,11 +1171,11 @@ ; CHECK: %x1 = COPY [[SRC]] ; CHECK: %x2 = COPY [[SIZE]] ; CHECK: BL $memmove, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2 - call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i32 1, i1 0) + call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %size, i1 0) ret void } -declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32 %align, i1 %volatile) +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) define void @test_memset(i8* %dst, i8 %val, i64 %size) { ; CHECK-LABEL: name: test_memset ; CHECK: [[DST:%[0-9]+]]:_(p0) = COPY %x0 @@ -1187,7 +1187,7 @@ ; CHECK: %w1 = COPY [[SRC_TMP]] ; CHECK: %x2 = COPY [[SIZE]] ; CHECK: BL $memset, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %w1, implicit %x2 - call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i32 1, i1 0) + call void @llvm.memset.p0i8.i64(i8* %dst, i8 %val, i64 %size, i1 0) ret void } Index: test/CodeGen/AArch64/PBQP-csr.ll =================================================================== --- test/CodeGen/AArch64/PBQP-csr.ll +++ test/CodeGen/AArch64/PBQP-csr.ll @@ -22,7 +22,7 @@ %z.i60 = getelementptr inbounds %rs, %rs* %r, i64 0, i32 9, i32 2 %na = getelementptr inbounds %rs, %rs* %r, i64 0, i32 0 %0 = bitcast double* %x.i to i8* - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 72, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 72, i1 false) %1 = load i32, i32* %na, align 4 %cmp70 = icmp sgt i32 %1, 0 br i1 %cmp70, label %for.body.lr.ph, label %for.end @@ -87,5 +87,5 @@ } ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) Index: test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll =================================================================== --- test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll +++ test/CodeGen/AArch64/aarch64-DAGCombine-findBetterNeighborChains-crash.ll @@ -6,13 +6,13 @@ declare void @extern(i8*) ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #0 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #0 ; Function Attrs: nounwind define void @func(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 { bb: %tmp = getelementptr inbounds i8, i8* %arg2, i64 88 - tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false) store i8 0, i8* %arg3 store i8 2, i8* %arg2 store float 0.000000e+00, float* %arg @@ -27,7 +27,7 @@ define void @func2(float* noalias %arg, i32* noalias %arg1, i8* noalias %arg2, i8* noalias %arg3) #1 { bb: %tmp = getelementptr inbounds i8, i8* %arg2, i64 88 - tail call void @llvm.memset.p0i8.i64(i8* noalias %arg2, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 noalias %arg2, i8 0, i64 40, i1 false) store i8 0, i8* %arg3 store i8 2, i8* %arg2 store float 0.000000e+00, float* %arg Index: test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll =================================================================== --- test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll +++ test/CodeGen/AArch64/arm64-2012-05-07-MemcpyAlignBug.ll @@ -14,8 +14,8 @@ ; CHECK-NEXT: str [[VAL2]], [x0] define void @foo(i8* %a) { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([3 x i32]* @b to i8*), i64 12, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a, i8* align 4 bitcast ([3 x i32]* @b to i8*), i64 12, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind Index: test/CodeGen/AArch64/arm64-abi-varargs.ll =================================================================== --- test/CodeGen/AArch64/arm64-abi-varargs.ll +++ test/CodeGen/AArch64/arm64-abi-varargs.ll @@ -164,10 +164,10 @@ %4 = bitcast i8* %ap.align to %struct.s41* %5 = bitcast %struct.s41* %vs to i8* %6 = bitcast %struct.s41* %4 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %5, i8* %6, i64 16, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %5, i8* align 16 %6, i64 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind define void @bar2(i32 %x, i128 %s41.coerce) nounwind { entry: Index: test/CodeGen/AArch64/arm64-abi_align.ll =================================================================== --- test/CodeGen/AArch64/arm64-abi_align.ll +++ test/CodeGen/AArch64/arm64-abi_align.ll @@ -300,14 +300,14 @@ %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 %0 = bitcast %struct.s42* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s42* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 %call = call i32 @f42(i32 3, %struct.s42* %tmp, %struct.s42* %tmp1) #5 ret i32 %call } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #4 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) #4 declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, %struct.s42* nocapture %s1, @@ -346,9 +346,9 @@ %tmp = alloca %struct.s42, align 4 %tmp1 = alloca %struct.s42, align 4 %0 = bitcast %struct.s42* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s42* @g42 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast (%struct.s42* @g42 to i8*), i64 24, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s42* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s42* @g42_2 to i8*), i64 24, i32 4, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast (%struct.s42* @g42_2 to i8*), i64 24, i1 false), !tbaa.struct !4 %call = call i32 @f42_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s42* %tmp, %struct.s42* %tmp1) #5 ret i32 %call @@ -414,9 +414,9 @@ %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s43* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 %call = call i32 @f43(i32 3, %struct.s43* %tmp, %struct.s43* %tmp1) #5 ret i32 %call } @@ -465,9 +465,9 @@ %tmp = alloca %struct.s43, align 16 %tmp1 = alloca %struct.s43, align 16 %0 = bitcast %struct.s43* %tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.s43* @g43 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast (%struct.s43* @g43 to i8*), i64 32, i1 false), !tbaa.struct !4 %1 = bitcast %struct.s43* %tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast (%struct.s43* @g43_2 to i8*), i64 32, i32 16, i1 false), !tbaa.struct !4 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 bitcast (%struct.s43* @g43_2 to i8*), i64 32, i1 false), !tbaa.struct !4 %call = call i32 @f43_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, %struct.s43* %tmp, %struct.s43* %tmp1) #5 ret i32 %call Index: test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll =================================================================== --- test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll +++ test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll @@ -11,11 +11,11 @@ ; ARM64: mov x2, #80 ; ARM64: uxtb w1, w9 ; ARM64: bl _memset - call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i8 0, i64 80, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) define void @t2() { ; ARM64-LABEL: t2 @@ -25,11 +25,11 @@ ; ARM64: add x1, x8, _message@PAGEOFF ; ARM64: mov x2, #80 ; ARM64: bl _memcpy - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 80, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) define void @t3() { ; ARM64-LABEL: t3 @@ -39,11 +39,11 @@ ; ARM64: add x1, x8, _message@PAGEOFF ; ARM64: mov x2, #20 ; ARM64: bl _memmove - call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i32 16, i1 false) + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 20, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) define void @t4() { ; ARM64-LABEL: t4 @@ -58,7 +58,7 @@ ; ARM64: ldrb w11, [x9, #16] ; ARM64: strb w11, [x8, #16] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 16, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 16 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false) ret void } @@ -75,7 +75,7 @@ ; ARM64: ldrb w11, [x9, #16] ; ARM64: strb w11, [x8, #16] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 8 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 17, i1 false) ret void } @@ -92,7 +92,7 @@ ; ARM64: ldrb w10, [x9, #8] ; ARM64: strb w10, [x8, #8] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 4 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 9, i1 false) ret void } @@ -111,7 +111,7 @@ ; ARM64: ldrb w10, [x9, #6] ; ARM64: strb w10, [x8, #6] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 2 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 7, i1 false) ret void } @@ -130,7 +130,7 @@ ; ARM64: ldrb w10, [x9, #3] ; ARM64: strb w10, [x8, #3] ; ARM64: ret - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @temp, i32 0, i32 0), i8* align 1 getelementptr inbounds ([80 x i8], [80 x i8]* @message, i32 0, i32 0), i64 4, i1 false) ret void } @@ -143,6 +143,6 @@ ; ARM64: strb [[BYTE]], [x0] %array = alloca i8, i32 8192 %elem = getelementptr i8, i8* %array, i32 8000 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %elem, i64 1, i1 false) ret void } Index: test/CodeGen/AArch64/arm64-memcpy-inline.ll =================================================================== --- test/CodeGen/AArch64/arm64-memcpy-inline.ll +++ test/CodeGen/AArch64/arm64-memcpy-inline.ll @@ -22,7 +22,7 @@ ; CHECK: strh [[REG1]], [x[[BASEREG2]], #8] ; CHECK: ldr [[REG2:x[0-9]+]], ; CHECK: str [[REG2]], - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) ret i32 0 } @@ -33,7 +33,7 @@ ; CHECK: stur [[DEST]], [x0, #15] ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] ; CHECK: str [[DEST]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -45,7 +45,7 @@ ; CHECK: str [[REG3]], [x0, #32] ; CHECK: ldp [[DEST1:q[0-9]+]], [[DEST2:q[0-9]+]], [x{{[0-9]+}}] ; CHECK: stp [[DEST1]], [[DEST2]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false) ret void } @@ -56,7 +56,7 @@ ; CHECK: str [[REG4]], [x0, #16] ; CHECK: ldr [[DEST:q[0-9]+]], [x[[BASEREG]]] ; CHECK: str [[DEST]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } @@ -67,7 +67,7 @@ ; CHECK: strh [[REG5]], [x0, #16] ; CHECK: ldr [[REG6:q[0-9]+]], [x{{[0-9]+}}] ; CHECK: str [[REG6]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false) ret void } @@ -80,7 +80,7 @@ ; CHECK: mov [[REG8:w[0-9]+]], ; CHECK: movk [[REG8]], ; CHECK: str [[REG8]], [x0] - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false) ret void } @@ -91,7 +91,7 @@ ; CHECK: stur [[REG9]], [x{{[0-9]+}}, #6] ; CHECK: ldr ; CHECK: str - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false) ret void } @@ -104,9 +104,9 @@ ; CHECK: str [[REG10]], [x0] %0 = bitcast %struct.Foo* %a to i8* %1 = bitcast %struct.Foo* %b to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind Index: test/CodeGen/AArch64/arm64-memset-inline.ll =================================================================== --- test/CodeGen/AArch64/arm64-memset-inline.ll +++ test/CodeGen/AArch64/arm64-memset-inline.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: t1: ; CHECK: str wzr, [x0, #8] ; CHECK: str xzr, [x0] - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false) ret void } @@ -17,11 +17,11 @@ ; CHECK: str xzr, [sp, #8] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i1 false) call void @something(i8* %0) nounwind ret void } declare void @something(i8*) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind Index: test/CodeGen/AArch64/arm64-memset-to-bzero.ll =================================================================== --- test/CodeGen/AArch64/arm64-memset-to-bzero.ll +++ test/CodeGen/AArch64/arm64-memset-to-bzero.ll @@ -10,11 +10,11 @@ ; CHECK-LINUX: {{b|bl}} memset define void @fct1(i8* nocapture %ptr) { entry: - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) ; CHECK-LABEL: fct2: ; When the size is bigger than 256, change into bzero. @@ -22,7 +22,7 @@ ; CHECK-LINUX: {{b|bl}} memset define void @fct2(i8* nocapture %ptr) { entry: - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false) ret void } @@ -33,7 +33,7 @@ define void @fct3(i8* nocapture %ptr, i32 %unknown) { entry: %conv = sext i32 %unknown to i64 - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false) ret void } Index: test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll =================================================================== --- test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll +++ test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll @@ -7,8 +7,8 @@ ; CHECK: orr w2, wzr, #0x10 ; CHECK-NEXT: bl _memcpy entry: - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out, i8* %in, i64 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) Index: test/CodeGen/AArch64/arm64-misched-basic-A53.ll =================================================================== --- test/CodeGen/AArch64/arm64-misched-basic-A53.ll +++ test/CodeGen/AArch64/arm64-misched-basic-A53.ll @@ -26,9 +26,9 @@ %yy = alloca i32, align 4 store i32 0, i32* %retval %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) store i32 0, i32* %xx, align 4 store i32 0, i32* %yy, align 4 store i32 0, i32* %i, align 4 @@ -105,7 +105,7 @@ } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } Index: test/CodeGen/AArch64/arm64-misched-basic-A57.ll =================================================================== --- test/CodeGen/AArch64/arm64-misched-basic-A57.ll +++ test/CodeGen/AArch64/arm64-misched-basic-A57.ll @@ -32,9 +32,9 @@ %yy = alloca i32, align 4 store i32 0, i32* %retval %0 = bitcast [8 x i32]* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([8 x i32]* @main.x to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 bitcast ([8 x i32]* @main.x to i8*), i64 32, i1 false) %1 = bitcast [8 x i32]* %y to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([8 x i32]* @main.y to i8*), i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %1, i8* align 4 bitcast ([8 x i32]* @main.y to i8*), i64 32, i1 false) store i32 0, i32* %xx, align 4 store i32 0, i32* %yy, align 4 store i32 0, i32* %i, align 4 @@ -106,7 +106,7 @@ ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } Index: test/CodeGen/AArch64/arm64-stur.ll =================================================================== --- test/CodeGen/AArch64/arm64-stur.ll +++ test/CodeGen/AArch64/arm64-stur.ll @@ -55,11 +55,11 @@ ; CHECK-NEXT: ret %B = getelementptr inbounds %struct.X, %struct.X* %p, i64 0, i32 1 %val = bitcast i64* %B to i8* - call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* %val, i8 0, i64 16, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind ; Unaligned 16b stores are split into 8b stores for performance. ; radar://15424193 Index: test/CodeGen/AArch64/arm64-virtual_base.ll =================================================================== --- test/CodeGen/AArch64/arm64-virtual_base.ll +++ test/CodeGen/AArch64/arm64-virtual_base.ll @@ -43,9 +43,9 @@ %tmp14 = bitcast double* %arraydecay5.3.1 to i8* %arraydecay11.3.1 = getelementptr inbounds %struct.Bicubic_Patch_Struct, %struct.Bicubic_Patch_Struct* %Shape, i64 0, i32 12, i64 1, i64 3, i64 0 %tmp15 = bitcast double* %arraydecay11.3.1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i1 false) ret void } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) Index: test/CodeGen/AArch64/fast-isel-memcpy.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-memcpy.ll +++ test/CodeGen/AArch64/fast-isel-memcpy.ll @@ -8,8 +8,8 @@ define void @test(i64 %a, i8* %b) { %1 = and i64 %a, 9223372036854775807 %2 = inttoptr i64 %1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %b, i64 8, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %b, i64 8, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) Index: test/CodeGen/AArch64/func-argpassing.ll =================================================================== --- test/CodeGen/AArch64/func-argpassing.ll +++ test/CodeGen/AArch64/func-argpassing.ll @@ -186,11 +186,11 @@ ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1) define i32 @test_extern() { ; CHECK-LABEL: test_extern: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 undef, i32 4, i1 0) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 undef, i8* align 4 undef, i32 undef, i1 0) ; CHECK: bl memcpy ret i32 0 } Index: test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll =================================================================== --- test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll +++ test/CodeGen/AArch64/ldp-stp-scaled-unscaled-pairs.ll @@ -115,11 +115,11 @@ %C = getelementptr inbounds [12 x i8], [12 x i8]* %a2, i64 0, i64 4 %1 = bitcast i8* %C to i64* store i64 0, i64* %1, align 4 - call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 8, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) attributes #0 = { nounwind } Index: test/CodeGen/AArch64/ldst-paired-aliasing.ll =================================================================== --- test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -5,7 +5,7 @@ declare void @f(i8*, i8*) declare void @f2(i8*, i8*) declare void @_Z5setupv() -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #3 define i32 @main() local_unnamed_addr #1 { ; Make sure the stores happen in the correct order (the exact instructions could change). @@ -24,7 +24,7 @@ tail call void @_Z5setupv() %x2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 6 %x3 = bitcast i32* %x2 to i8* - call void @llvm.memset.p0i8.i64(i8* %x3, i8 0, i64 16, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %x3, i8 0, i64 16, i1 false) %arraydecay2 = getelementptr inbounds [10 x i32], [10 x i32]* %b1, i64 0, i64 0 %x4 = bitcast [10 x i32]* %b1 to <4 x i32>* store <4 x i32> , <4 x i32>* %x4, align 16 Index: test/CodeGen/AArch64/ldst-zero.ll =================================================================== --- test/CodeGen/AArch64/ldst-zero.ll +++ test/CodeGen/AArch64/ldst-zero.ll @@ -3,7 +3,7 @@ ; Tests to check that zero stores which are generated as STP xzr, xzr aren't ; scheduled incorrectly due to incorrect alias information -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) %struct.tree_common = type { i8*, i8*, i32 } ; Original test case which exhibited the bug @@ -14,7 +14,7 @@ ; CHECK-DAG: str xzr, [x0] entry: %0 = bitcast %struct.tree_common* %t to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false) %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2 store i32 %code, i32* %code1, align 8 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 Index: test/CodeGen/AArch64/machine-combiner-madd.ll =================================================================== --- test/CodeGen/AArch64/machine-combiner-madd.ll +++ test/CodeGen/AArch64/machine-combiner-madd.ll @@ -19,7 +19,7 @@ %class.D = type { %class.basic_string.base, [4 x i8] } %class.basic_string.base = type <{ i64, i64, i32 }> @a = global %class.D* zeroinitializer, align 8 -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) define internal void @fun() section ".text.startup" { entry: %tmp.i.i = alloca %class.D, align 8 @@ -31,7 +31,7 @@ %x = load %class.D*, %class.D** getelementptr inbounds (%class.D*, %class.D** @a, i64 0), align 8 %arrayidx.i.i.i = getelementptr inbounds %class.D, %class.D* %x, i64 %conv11.i.i %d = bitcast %class.D* %arrayidx.i.i.i to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %y, i8* %d, i64 24, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 nonnull %y, i8* align 8 %d, i64 24, i1 false) %inc.i.i = add i64 %i, 1 %cmp.i.i = icmp slt i64 %inc.i.i, 0 br i1 %cmp.i.i, label %loop, label %exit Index: test/CodeGen/AArch64/memcpy-f128.ll =================================================================== --- test/CodeGen/AArch64/memcpy-f128.ll +++ test/CodeGen/AArch64/memcpy-f128.ll @@ -12,8 +12,8 @@ ; CHECK: str q0 ; CHECK: ret entry: - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* bitcast (%structA* @stubA to i8*), i64 48, i32 8, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 undef, i8* align 8 bitcast (%structA* @stubA to i8*), i64 48, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) Index: test/CodeGen/AArch64/merge-store-dependency.ll =================================================================== --- test/CodeGen/AArch64/merge-store-dependency.ll +++ test/CodeGen/AArch64/merge-store-dependency.ll @@ -14,7 +14,7 @@ ; A53: str [[DATA]], {{.*}} %0 = bitcast %struct1* %fde to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 40, i1 false) %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4 store i16 256, i16* %state, align 8 %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2 @@ -58,6 +58,6 @@ ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) declare i32 @fcntl(i32, i32, ...) declare noalias i8* @foo() Index: test/CodeGen/AArch64/mergestores_noimplicitfloat.ll =================================================================== --- test/CodeGen/AArch64/mergestores_noimplicitfloat.ll +++ test/CodeGen/AArch64/mergestores_noimplicitfloat.ll @@ -16,8 +16,8 @@ ; CHECK-DAG: str [[R3]], [x0, #24] define void @pr33475(i8* %p0, i8* %p1) noimplicitfloat { - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p0, i8* %p1, i64 32, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %p0, i8* align 4 %p1, i64 32, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) Index: test/CodeGen/AArch64/misched-stp.ll =================================================================== --- test/CodeGen/AArch64/misched-stp.ll +++ test/CodeGen/AArch64/misched-stp.ll @@ -30,7 +30,7 @@ ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) %struct.tree_common = type { i8*, i8*, i32 } ; CHECK-LABEL: test_zero @@ -41,7 +41,7 @@ define void @test_zero(%struct.tree_common* %t, i32 %code, i8* %type) { entry: %0 = bitcast %struct.tree_common* %t to i8* - tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 24, i32 8, i1 false) + tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 24, i1 false) %code1 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 2 store i32 %code, i32* %code1, align 8 %type2 = getelementptr inbounds %struct.tree_common, %struct.tree_common* %t, i64 0, i32 1 Index: test/CodeGen/AArch64/pr33172.ll =================================================================== --- test/CodeGen/AArch64/pr33172.ll +++ test/CodeGen/AArch64/pr33172.ll @@ -21,12 +21,12 @@ %wide.load8291059.4 = load i64, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.b, i64 0, i64 18) to i64*), align 8 store i64 %wide.load8281058.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 16) to i64*), align 8 store i64 %wide.load8291059.4, i64* bitcast (float* getelementptr inbounds ([200 x float], [200 x float]* @main.x, i64 0, i64 18) to i64*), align 8 - tail call void @llvm.memset.p0i8.i64(i8* bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i32 8, i1 false) #2 + tail call void @llvm.memset.p0i8.i64(i8* align 8 bitcast ([200 x float]* @main.b to i8*), i8 0, i64 undef, i1 false) #2 unreachable } ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 attributes #1 = { argmemonly nounwind } attributes #2 = { nounwind } Index: test/CodeGen/AArch64/tailcall-mem-intrinsics.ll =================================================================== --- test/CodeGen/AArch64/tailcall-mem-intrinsics.ll +++ test/CodeGen/AArch64/tailcall-mem-intrinsics.ll @@ -4,7 +4,7 @@ ; CHECK: b memcpy define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -12,7 +12,7 @@ ; CHECK: b memmove define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret void } @@ -20,12 +20,12 @@ ; CHECK: b memset define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { entry: - tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 attributes #0 = { nounwind } Index: test/CodeGen/AArch64/tailcall-string-rvo.ll =================================================================== --- test/CodeGen/AArch64/tailcall-string-rvo.ll +++ test/CodeGen/AArch64/tailcall-string-rvo.ll @@ -32,7 +32,7 @@ %tmp1 = bitcast %class.basic_string.11.42.73* %arg to %union.anon.8.39.70** store %union.anon.8.39.70* %tmp, %union.anon.8.39.70** %tmp1, align 8 %tmp2 = bitcast %union.anon.8.39.70* %tmp to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* nonnull undef, i64 13, i1 false) %tmp3 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 1 store i64 13, i64* %tmp3, align 8 %tmp4 = getelementptr inbounds %class.basic_string.11.42.73, %class.basic_string.11.42.73* %arg, i64 0, i32 0, i32 0, i32 2, i32 1, i64 5 @@ -42,6 +42,6 @@ } ; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0 attributes #0 = { argmemonly nounwind } Index: test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll =================================================================== --- test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll +++ test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll @@ -1,6 +1,6 @@ ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s -declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0 @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 @@ -68,7 +68,7 @@ ; HSA: @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #2 define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(i32 addrspace(1)* %out) #1 { - call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* %out, i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i32 4, i1 false) + call void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* align 4 %out, i32 addrspace(4)* align 4 getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), i32 32, i1 false) ret void } Index: test/CodeGen/AMDGPU/lds-alignment.ll =================================================================== --- test/CodeGen/AMDGPU/lds-alignment.ll +++ test/CodeGen/AMDGPU/lds-alignment.ll @@ -9,16 +9,16 @@ @lds.missing.align.0 = internal unnamed_addr addrspace(3) global [39 x i32] undef @lds.missing.align.1 = internal unnamed_addr addrspace(3) global [7 x i64] undef -declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #0 +declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #0 ; HSA-LABEL: {{^}}test_no_round_size_1: ; HSA: workgroup_group_segment_byte_size = 38 define amdgpu_kernel void @test_no_round_size_1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false) ret void } @@ -36,12 +36,12 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false) %lds.align16.1.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.1 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.1.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.1.bc, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.1.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.1.bc, i32 38, i1 false) ret void } @@ -52,12 +52,12 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_2_align_8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) ret void } @@ -67,11 +67,11 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_local_lds_and_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* %lds.arg) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.align16.0.bc, i8 addrspace(1)* align 4 %in, i32 38, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.align16.0.bc, i32 38, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.arg, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.arg, i32 38, i1 false) ret void } @@ -79,8 +79,8 @@ ; HSA: workgroup_group_segment_byte_size = 0 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_lds_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* %lds.arg) #1 { - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.arg, i8 addrspace(1)* align 4 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.arg, i32 38, i1 false) ret void } @@ -89,8 +89,8 @@ ; HSA: workgroup_group_segment_byte_size = 0 ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_high_align_lds_arg(i8 addrspace(1)* %out, i8 addrspace(1)* %in, i8 addrspace(3)* align 64 %lds.arg) #1 { - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.arg, i8 addrspace(1)* %in, i32 38, i32 64, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.arg, i32 38, i32 64, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 64 %lds.arg, i8 addrspace(1)* align 64 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 64 %out, i8 addrspace(3)* align 64 %lds.arg, i32 38, i1 false) ret void } @@ -100,12 +100,12 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_missing_alignment_size_2_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.0.bc, i8 addrspace(1)* %in, i32 160, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.0.bc, i32 160, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i8 addrspace(1)* align 4 %in, i32 160, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i32 160, i1 false) %lds.missing.align.1.bc = bitcast [7 x i64] addrspace(3)* @lds.missing.align.1 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.1.bc, i8 addrspace(1)* %in, i32 56, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.1.bc, i32 56, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i8 addrspace(1)* align 8 %in, i32 56, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i32 56, i1 false) ret void } @@ -116,12 +116,12 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_missing_alignment_size_2_order1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.missing.align.1.bc = bitcast [7 x i64] addrspace(3)* @lds.missing.align.1 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.1.bc, i8 addrspace(1)* %in, i32 56, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.1.bc, i32 56, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i8 addrspace(1)* align 8 %in, i32 56, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.missing.align.1.bc, i32 56, i1 false) %lds.missing.align.0.bc = bitcast [39 x i32] addrspace(3)* @lds.missing.align.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.missing.align.0.bc, i8 addrspace(1)* %in, i32 160, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.missing.align.0.bc, i32 160, i32 4, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i8 addrspace(1)* align 4 %in, i32 160, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out, i8 addrspace(3)* align 4 %lds.missing.align.0.bc, i32 160, i1 false) ret void } @@ -144,16 +144,16 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order0(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) ret void } @@ -165,16 +165,16 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) ret void } @@ -186,16 +186,16 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) ret void } @@ -207,16 +207,16 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order3(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) ret void } @@ -228,16 +228,16 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order4(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) ret void } @@ -249,16 +249,16 @@ ; HSA: group_segment_alignment = 4 define amdgpu_kernel void @test_round_size_3_order5(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #1 { %lds.align8.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align8.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align8.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align8.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align8.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align8.0.bc, i32 38, i1 false) %lds.align16.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align16.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align16.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align16.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align16.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align16.0.bc, i32 38, i1 false) %lds.align32.0.bc = bitcast [38 x i8] addrspace(3)* @lds.align32.0 to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %lds.align32.0.bc, i8 addrspace(1)* %in, i32 38, i32 8, i1 false) - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out, i8 addrspace(3)* %lds.align32.0.bc, i32 38, i32 8, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 8 %lds.align32.0.bc, i8 addrspace(1)* align 8 %in, i32 38, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 8 %out, i8 addrspace(3)* align 8 %lds.align32.0.bc, i32 38, i1 false) ret void } Index: test/CodeGen/AMDGPU/llvm.memcpy.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.memcpy.ll +++ test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -1,9 +1,9 @@ ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind -declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i1) nounwind +declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture, i64, i1) nounwind ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1: @@ -83,7 +83,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i1 false) nounwind ret void } @@ -128,7 +128,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %bcout, i8 addrspace(3)* align 2 %bcin, i32 32, i1 false) nounwind ret void } @@ -147,7 +147,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %bcout, i8 addrspace(3)* align 4 %bcin, i32 32, i1 false) nounwind ret void } @@ -164,7 +164,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)* %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)* - call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 8 %bcout, i8 addrspace(3)* align 8 %bcin, i32 32, i1 false) nounwind ret void } @@ -241,7 +241,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i1 false) nounwind ret void } @@ -284,7 +284,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %bcout, i8 addrspace(1)* align 2 %bcin, i64 32, i1 false) nounwind ret void } @@ -297,7 +297,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %bcout, i8 addrspace(1)* align 4 %bcin, i64 32, i1 false) nounwind ret void } @@ -310,7 +310,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 8 %bcout, i8 addrspace(1)* align 8 %bcin, i64 32, i1 false) nounwind ret void } @@ -323,7 +323,7 @@ define amdgpu_kernel void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind { %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)* %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 16 %bcout, i8 addrspace(1)* align 16 %bcin, i64 32, i1 false) nounwind ret void } @@ -342,7 +342,7 @@ ; SI-DAG: buffer_store_dwordx4 define amdgpu_kernel void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind { %str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)* - call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 4, i1 false) + call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %out, i8 addrspace(2)* align 4 %str, i64 32, i1 false) ret void } @@ -367,6 +367,6 @@ ; SI: buffer_store_byte define amdgpu_kernel void @test_memcpy_const_string_align1(i8 addrspace(1)* noalias %out) nounwind { %str = bitcast [16 x i8] addrspace(2)* @hello.align1 to i8 addrspace(2)* - call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i1 false) ret void } Index: test/CodeGen/AMDGPU/lower-mem-intrinsics.ll =================================================================== --- test/CodeGen/AMDGPU/lower-mem-intrinsics.ll +++ test/CodeGen/AMDGPU/lower-mem-intrinsics.ll @@ -1,16 +1,16 @@ ; RUN: opt -S -amdgpu-lower-intrinsics %s | FileCheck -check-prefix=OPT %s -declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 -declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1 -declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i32, i1) #1 -declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i32, i1) #1 +declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 +declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) #1 ; Test the upper bound for sizes to leave ; OPT-LABEL: @max_size_small_static_memcpy_caller0( -; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) +; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) ret void } @@ -26,14 +26,14 @@ ; OPT-NEXT: [[T5:%[0-9]+]] = icmp ult i64 [[T4]], 1025 ; OPT-NEXT: br i1 [[T5]], label %load-store-loop, label %memcpy-split define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) ret void } ; OPT-LABEL: @max_size_small_static_memmove_caller0( -; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) +; OPT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i32 1, i1 false) + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) ret void } @@ -44,14 +44,14 @@ ; OPT: getelementptr ; OPT-NEXT: store i8 define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { - call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i32 1, i1 false) + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) ret void } ; OPT-LABEL: @max_size_small_static_memset_caller0( -; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) +; OPT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { - call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i32 1, i1 false) + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) ret void } @@ -60,7 +60,7 @@ ; OPT: getelementptr ; OPT: store i8 define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { - call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i32 1, i1 false) + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i1 false) ret void } @@ -68,7 +68,7 @@ ; OPT-NOT: call ; OPT: phi define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) ret void } @@ -76,7 +76,7 @@ ; OPT-NOT: call ; OPT: phi define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) ret void } @@ -87,8 +87,8 @@ ; OPT: phi ; OPT-NOT: call define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i1 false) ret void } @@ -99,7 +99,7 @@ ; OPT: getelementptr inbounds i8, i8 addrspace(1)* ; OPT: store i8 define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { - call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i1 false) ret void } @@ -110,10 +110,10 @@ ; OPT: getelementptr inbounds i8, i8 addrspace(1)* ; OPT: store i8 -; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) +; OPT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 { - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i32 1, i1 false) - call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i32 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) ret void } Index: test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll =================================================================== --- test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll +++ test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll @@ -1,52 +1,52 @@ ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -amdgpu-promote-alloca < %s | FileCheck %s -declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 -declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0 +declare void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0 -declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i32, i1) #0 -declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memmove.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) #0 +declare void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* nocapture, i8* nocapture, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) #1 ; CHECK-LABEL: @promote_with_memcpy( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}} -; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) -; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false) +; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) +; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false) define amdgpu_kernel void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %alloca = alloca [17 x i32], align 4 %alloca.bc = bitcast [17 x i32]* %alloca to i8* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memcpy.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) - call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) + call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false) ret void } ; CHECK-LABEL: @promote_with_memmove( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memmove.alloca, i32 0, i32 %{{[0-9]+}} -; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) -; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false) +; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) +; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %out.bc, i8 addrspace(3)* align 4 %alloca.bc, i32 68, i1 false) define amdgpu_kernel void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %alloca = alloca [17 x i32], align 4 %alloca.bc = bitcast [17 x i32]* %alloca to i8* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memmove.p0i8.p1i8.i32(i8* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false) - call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* %out.bc, i8* %alloca.bc, i32 68, i32 4, i1 false) + call void @llvm.memmove.p0i8.p1i8.i32(i8* align 4 %alloca.bc, i8 addrspace(1)* align 4 %in.bc, i32 68, i1 false) + call void @llvm.memmove.p1i8.p0i8.i32(i8 addrspace(1)* align 4 %out.bc, i8* align 4 %alloca.bc, i32 68, i1 false) ret void } ; CHECK-LABEL: @promote_with_memset( ; CHECK: getelementptr inbounds [64 x [17 x i32]], [64 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}} -; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %alloca.bc, i8 7, i32 68, i32 4, i1 false) +; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* align 4 %alloca.bc, i8 7, i32 68, i1 false) define amdgpu_kernel void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %alloca = alloca [17 x i32], align 4 %alloca.bc = bitcast [17 x i32]* %alloca to i8* %in.bc = bitcast i32 addrspace(1)* %in to i8 addrspace(1)* %out.bc = bitcast i32 addrspace(1)* %out to i8 addrspace(1)* - call void @llvm.memset.p0i8.i32(i8* %alloca.bc, i8 7, i32 68, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %alloca.bc, i8 7, i32 68, i1 false) ret void } Index: test/CodeGen/AMDGPU/stack-size-overflow.ll =================================================================== --- test/CodeGen/AMDGPU/stack-size-overflow.ll +++ test/CodeGen/AMDGPU/stack-size-overflow.ll @@ -1,7 +1,7 @@ ; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; RUN: not llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #1 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #1 ; ERROR: error: stack size limit exceeded (4294967296) in stack_size_limit ; GCN: ; ScratchSize: 4294967296 @@ -9,6 +9,6 @@ entry: %alloca = alloca [1073741823 x i32], align 4 %bc = bitcast [1073741823 x i32]* %alloca to i8* - call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i32 1, i1 true) + call void @llvm.memset.p0i8.i32(i8* %bc, i8 9, i32 1073741823, i1 true) ret void } Index: test/CodeGen/ARM/2009-03-07-SpillerBug.ll =================================================================== --- test/CodeGen/ARM/2009-03-07-SpillerBug.ll +++ test/CodeGen/ARM/2009-03-07-SpillerBug.ll @@ -59,7 +59,7 @@ %34 = fadd double %31, 0.000000e+00 %35 = fadd double %32, 0.000000e+00 %36 = bitcast %struct.ggPoint3* %x to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 null, i8* align 4 %36, i32 24, i1 false) store double %33, double* null, align 8 br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i @@ -76,4 +76,4 @@ ret i32 0 } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind Index: test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll =================================================================== --- test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll +++ test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll @@ -16,7 +16,7 @@ bb1: ; preds = %entry %0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind - call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 undef, i8 0, i32 40, i1 false) %1 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 0 store %struct.mo* undef, %struct.mo** %1, align 4 %2 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 5 @@ -40,7 +40,7 @@ declare %struct.ui* @vn_pp_to_ui(i32*) -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind declare i32 @mo_create_nnm(%struct.mo*, i64, i32**) Index: test/CodeGen/ARM/2011-10-26-memset-inline.ll =================================================================== --- test/CodeGen/ARM/2011-10-26-memset-inline.ll +++ test/CodeGen/ARM/2011-10-26-memset-inline.ll @@ -14,8 +14,8 @@ ; CHECK-UNALIGNED: str define void @foo(i8* nocapture %c) nounwind optsize { entry: - call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false) + call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind Index: test/CodeGen/ARM/2011-10-26-memset-with-neon.ll =================================================================== --- test/CodeGen/ARM/2011-10-26-memset-with-neon.ll +++ test/CodeGen/ARM/2011-10-26-memset-with-neon.ll @@ -5,8 +5,8 @@ ; CHECK: vst1.64 define void @f_0_40(i8* nocapture %c) nounwind optsize { entry: - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 16 %c, i8 0, i64 40, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind Index: test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll =================================================================== --- test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll +++ test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll @@ -19,7 +19,7 @@ declare i8* @__cxa_begin_catch(i8*) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind declare void @__cxa_end_catch() Index: test/CodeGen/ARM/Windows/memset.ll =================================================================== --- test/CodeGen/ARM/Windows/memset.ll +++ test/CodeGen/ARM/Windows/memset.ll @@ -2,11 +2,11 @@ @source = common global [512 x i8] zeroinitializer, align 4 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define void @function() { entry: - call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* bitcast ([512 x i8]* @source to i8*), i8 0, i32 512, i1 false) unreachable } Index: test/CodeGen/ARM/Windows/no-aeabi.ll =================================================================== --- test/CodeGen/ARM/Windows/no-aeabi.ll +++ test/CodeGen/ARM/Windows/no-aeabi.ll @@ -1,14 +1,14 @@ ; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -verify-machineinstrs -o - %s | FileCheck %s -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind @source = common global [512 x i8] zeroinitializer, align 4 @target = common global [512 x i8] zeroinitializer, align 4 define void @move() nounwind { entry: - call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i1 false) unreachable } @@ -16,7 +16,7 @@ define void @copy() nounwind { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([512 x i8]* @target to i8*), i8* bitcast ([512 x i8]* @source to i8*), i32 512, i1 false) unreachable } Index: test/CodeGen/ARM/arm-eabi.ll =================================================================== --- test/CodeGen/ARM/arm-eabi.ll +++ test/CodeGen/ARM/arm-eabi.ll @@ -39,7 +39,7 @@ %4 = bitcast %struct.my_s* %3 to i8* ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %4, i8* inttoptr (i32 1 to i8*), i32 72, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %4, i8* align 4 inttoptr (i32 1 to i8*), i32 72, i1 false) ret void } @@ -50,22 +50,22 @@ ; memmove ; CHECK-EABI: bl __aeabi_memmove ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; memcpy ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; memset ; CHECK-EABI: mov r2, #1 ; CHECK-EABI: bl __aeabi_memset ; CHECK-GNUEABI: mov r1, #1 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind Index: test/CodeGen/ARM/constantpool-promote-ldrh.ll =================================================================== --- test/CodeGen/ARM/constantpool-promote-ldrh.ll +++ test/CodeGen/ARM/constantpool-promote-ldrh.ll @@ -12,10 +12,10 @@ ; CHECK: ldrh r{{[0-9]+}}, {{\[}}[[base]]] define hidden i32 @fn1() #0 { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 undef, i8* align 2 bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i1 false) ret i32 undef } ; Function Attrs: argmemonly nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) attributes #0 = { "target-features"="+strict-align" } Index: test/CodeGen/ARM/constantpool-promote.ll =================================================================== --- test/CodeGen/ARM/constantpool-promote.ll +++ test/CodeGen/ARM/constantpool-promote.ll @@ -120,7 +120,7 @@ entry: %a = alloca [4 x i16], align 2 %0 = bitcast [4 x i16]* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %0, i8* align 2 bitcast ([4 x i16]* @fn1.a to i8*), i32 8, i1 false) ret void } @@ -128,7 +128,7 @@ entry: %a = alloca [8 x i8], align 2 %0 = bitcast [8 x i8]* %a to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([8 x i8]* @fn2.a to i8*), i32 16, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([8 x i8]* @fn2.a to i8*), i32 16, i1 false) ret void } @@ -156,7 +156,7 @@ ; CHECK-V7: [[x]]: ; CHECK-V7: .asciz "s\000\000" define void @test10(i8* %a) local_unnamed_addr #0 { - call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i32 1, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 %a, i8* align 1 getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i32 1, i1 false) ret void } @@ -174,16 +174,16 @@ ; CHECK-V7ARM: .short 3 ; CHECK-V7ARM: .short 4 define void @test11(i16* %a) local_unnamed_addr #0 { - call void @llvm.memmove.p0i16.p0i16.i32(i16* %a, i16* getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0), i32 2, i32 2, i1 false) + call void @llvm.memmove.p0i16.p0i16.i32(i16* align 2 %a, i16* align 2 getelementptr inbounds ([2 x i16], [2 x i16]* @.arr1, i32 0, i32 0), i32 2, i1 false) ret void } declare void @b(i8*) #1 declare void @c(i16*) #1 -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32, i1) -declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) local_unnamed_addr -declare void @llvm.memmove.p0i16.p0i16.i32(i16*, i16*, i32, i32, i1) local_unnamed_addr +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) +declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i1) local_unnamed_addr +declare void @llvm.memmove.p0i16.p0i16.i32(i16*, i16*, i32, i1) local_unnamed_addr attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/CodeGen/ARM/crash-O0.ll =================================================================== --- test/CodeGen/ARM/crash-O0.ll +++ test/CodeGen/ARM/crash-O0.ll @@ -12,7 +12,7 @@ } @.str523 = private constant [256 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4 ; <[256 x i8]*> [#uses=1] -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind ; This function uses the scavenger for an ADDri instruction. ; ARMBaseRegisterInfo::estimateRSStackSizeLimit must return a 255 limit. @@ -21,8 +21,8 @@ %letter = alloca i8 ; [#uses=0] %prodvers = alloca [256 x i8] ; <[256 x i8]*> [#uses=1] %buildver = alloca [256 x i8] ; <[256 x i8]*> [#uses=0] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 1 getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i1 false) %prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; [#uses=1] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %prodvers2, i8* align 1 getelementptr inbounds ([256 x i8], [256 x i8]* @.str523, i32 0, i32 0), i32 256, i1 false) unreachable } Index: test/CodeGen/ARM/debug-info-blocks.ll =================================================================== --- test/CodeGen/ARM/debug-info-blocks.ll +++ test/CodeGen/ARM/debug-info-blocks.ll @@ -35,7 +35,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 { %1 = alloca %0*, align 4 @@ -77,7 +77,7 @@ %24 = bitcast i8* %23 to %struct.CR*, !dbg !143 %25 = bitcast %struct.CR* %24 to i8*, !dbg !143 %26 = bitcast %struct.CR* %data to i8*, !dbg !143 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 16, i1 false), !dbg !143 %27 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !144 %28 = load %3*, %3** %27, align 4, !dbg !144 %29 = load i32, i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144 @@ -86,7 +86,7 @@ %32 = bitcast i8* %31 to %struct.CR*, !dbg !144 %33 = bitcast %struct.CR* %32 to i8*, !dbg !144 %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144 + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %33, i8* align 4 %34, i32 16, i1 false), !dbg !144 %35 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !145 %36 = load %3*, %3** %35, align 4, !dbg !145 %37 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !145 Index: test/CodeGen/ARM/dyn-stackalloc.ll =================================================================== --- test/CodeGen/ARM/dyn-stackalloc.ll +++ test/CodeGen/ARM/dyn-stackalloc.ll @@ -51,7 +51,7 @@ %tmp9 = call i8* @strcpy(i8* %tmp6, i8* %tag) %tmp6.len = call i32 @strlen(i8* %tmp6) %tmp6.indexed = getelementptr i8, i8* %tmp6, i32 %tmp6.len - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp6.indexed, i8* align 1 getelementptr inbounds ([2 x i8], [2 x i8]* @str215, i32 0, i32 0), i32 2, i1 false) %tmp15 = call i8* @strcat(i8* %tmp6, i8* %contents) call fastcc void @comment_add(%struct.comment* %vc, i8* %tmp6) ret void @@ -65,4 +65,4 @@ declare i8* @strcpy(i8*, i8*) -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind Index: test/CodeGen/ARM/fast-isel-intrinsic.ll =================================================================== --- test/CodeGen/ARM/fast-isel-intrinsic.ll +++ test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -44,11 +44,11 @@ ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i1 false) ret void } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define void @t2() nounwind ssp { ; ARM-LABEL: t2: @@ -93,11 +93,11 @@ ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @t3() nounwind ssp { ; ARM-LABEL: t3: @@ -141,7 +141,7 @@ ; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr ; THUMB-LONG: ldr r3, [r3] ; THUMB-LONG: blx r3 - call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } @@ -173,11 +173,11 @@ ; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind define void @t5() nounwind ssp { ; ARM-LABEL: t5: @@ -215,7 +215,7 @@ ; THUMB: ldrh r1, [r0, #24] ; THUMB: strh r1, [r0, #12] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } @@ -275,14 +275,14 @@ ; THUMB: ldrb r1, [r0, #25] ; THUMB: strb r1, [r0, #13] ; THUMB: bx lr - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void } ; rdar://13202135 define void @t7() nounwind ssp { ; Just make sure this doesn't assert when we have an odd length and an alignment of 2. - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i32 2, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i1 false) ret void } Index: test/CodeGen/ARM/interval-update-remat.ll =================================================================== --- test/CodeGen/ARM/interval-update-remat.ll +++ test/CodeGen/ARM/interval-update-remat.ll @@ -85,7 +85,7 @@ if.end: ; preds = %_ZN7MessageD1Ev.exit33, %entry %message_.i.i = getelementptr inbounds %class.AssertionResult.24.249.299.1324.2349, %class.AssertionResult.24.249.299.1324.2349* %gtest_ar, i32 0, i32 1 %call.i.i.i = call %class.scoped_ptr.23.248.298.1323.2348* @_ZN10scoped_ptrI25Trans_NS___1_basic_stringIciiEED2Ev(%class.scoped_ptr.23.248.298.1323.2348* %message_.i.i) - call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 12, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 null, i8 0, i32 12, i1 false) call void @_ZN25Trans_NS___1_basic_stringIciiE5m_fn2Ev(%class.Trans_NS___1_basic_string.18.243.293.1318.2343* nonnull %ref.tmp) call void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* nonnull sret %agg.tmp16) %callback_.i = getelementptr inbounds %class.TestCompletionCallback.9.234.284.1309.2334, %class.TestCompletionCallback.9.234.284.1309.2334* %callback, i32 0, i32 1 @@ -137,7 +137,7 @@ declare void @_Z19CreateSOCKSv5Paramsv(%class.scoped_refptr.19.244.294.1319.2344* sret) ; Function Attrs: argmemonly nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 declare %class.BoundNetLog.20.245.295.1320.2345* @_ZN11BoundNetLogD1Ev(%class.BoundNetLog.20.245.295.1320.2345* returned) unnamed_addr Index: test/CodeGen/ARM/ldm-stm-base-materialization.ll =================================================================== --- test/CodeGen/ARM/ldm-stm-base-materialization.ll +++ test/CodeGen/ARM/ldm-stm-base-materialization.ll @@ -22,7 +22,7 @@ %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 24, i1 false) ret void } @@ -43,7 +43,7 @@ %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 28, i1 false) ret void } @@ -64,7 +64,7 @@ %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 32, i1 false) ret void } @@ -85,9 +85,9 @@ %2 = load i32*, i32** @b, align 4 %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1 %3 = bitcast i32* %arrayidx1 to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %3, i32 36, i1 false) ret void } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 Index: test/CodeGen/ARM/machine-cse-cmp.ll =================================================================== --- test/CodeGen/ARM/machine-cse-cmp.ll +++ test/CodeGen/ARM/machine-cse-cmp.ll @@ -37,14 +37,14 @@ for.body.lr.ph: ; preds = %entry %1 = icmp sgt i32 %0, 1 %smax = select i1 %1, i32 %0, i32 1 - call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8], [250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i1 false) unreachable for.cond1.preheader: ; preds = %entry ret void } -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind ; rdar://12462006 define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind { Index: test/CodeGen/ARM/memcpy-inline.ll =================================================================== --- test/CodeGen/ARM/memcpy-inline.ll +++ test/CodeGen/ARM/memcpy-inline.ll @@ -23,7 +23,7 @@ ; CHECK-T1: strb [[TREG1]], ; CHECK-T1: ldrh [[TREG2:r[0-9]]], ; CHECK-T1: strh [[TREG2]] - call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @dst, i32 0, i32 0), i8* align 8 getelementptr inbounds (%struct.x, %struct.x* @src, i32 0, i32 0), i32 11, i1 false) ret i32 0 } @@ -37,7 +37,7 @@ ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] ; CHECK-T1-LABEL: t1: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i1 false) ret void } @@ -55,7 +55,7 @@ ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3] ; CHECK-T1-LABEL: t2: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i1 false) ret void } @@ -68,7 +68,7 @@ ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0] ; CHECK-T1-LABEL: t3: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i1 false) ret void } @@ -80,7 +80,7 @@ ; CHECK: strh [[REG5:r[0-9]+]], [r0] ; CHECK-T1-LABEL: t4: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i1 false) ret void } @@ -96,7 +96,7 @@ ; CHECK: str [[REG7]] ; CHECK-T1-LABEL: t5: ; CHECK-T1: bl _memcpy - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i1 false) ret void } @@ -114,7 +114,7 @@ ; CHECK-T1: strh [[TREG5]], ; CHECK-T1: ldr [[TREG6:r[0-9]]], ; CHECK-T1: str [[TREG6]] - call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8], [512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str6, i64 0, i64 0), i64 14, i1 false) ret void } @@ -130,9 +130,9 @@ ; CHECK-T1: str %0 = bitcast %struct.Foo* %a to i8* %1 = bitcast %struct.Foo* %b to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %0, i8* align 4 %1, i32 16, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind Index: test/CodeGen/ARM/memcpy-ldm-stm.ll =================================================================== --- test/CodeGen/ARM/memcpy-ldm-stm.ll +++ test/CodeGen/ARM/memcpy-ldm-stm.ll @@ -24,7 +24,7 @@ ; Think of the monstrosity '{{\[}}[[LB]]]' as '[ [[LB]] ]' without the spaces. ; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]]] ; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]]] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8*), i8* align 4 bitcast ([64 x i32]* @d to i8*), i32 17, i1 false) ret void } @@ -42,7 +42,7 @@ ; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2] ; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2] ; CHECK-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]] - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 bitcast ([64 x i32]* @s to i8*), i8* align 4 bitcast ([64 x i32]* @d to i8*), i32 15, i1 false) ret void } @@ -54,13 +54,13 @@ define void @t3() { call void @llvm.memcpy.p0i8.p0i8.i32( - i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), - i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), - i32 24, i32 8, i1 false) + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), + i32 24, i1 false) call void @llvm.memcpy.p0i8.p0i8.i32( - i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), - i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), - i32 24, i32 8, i1 false) + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0), + i8* align 8 getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0), + i32 24, i1 false) ret void } @@ -70,7 +70,7 @@ define void @test3(%struct.S* %d, %struct.S* %s) #0 { %1 = bitcast %struct.S* %d to i8* %2 = bitcast %struct.S* %s to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 48, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %1, i8* align 4 %2, i32 48, i1 false) ; 3 ldm/stm pairs in v6; 2 in v7 ; CHECK: ldm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1:{.*}]] ; CHECK: stm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1]] @@ -91,4 +91,4 @@ attributes #0 = { "no-frame-pointer-elim"="true" } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 Index: test/CodeGen/ARM/memcpy-no-inline.ll =================================================================== --- test/CodeGen/ARM/memcpy-no-inline.ll +++ test/CodeGen/ARM/memcpy-no-inline.ll @@ -14,7 +14,7 @@ ; CHECK-NOT: ldm %mystring = alloca [31 x i8], align 1 %0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([31 x i8], [31 x i8]* @.str, i32 0, i32 0), i32 31, i1 false) ret void } @@ -24,10 +24,10 @@ ; CHECK-NOT: __aeabi_memcpy %mystring = alloca [31 x i8], align 1 %0 = getelementptr inbounds [31 x i8], [31 x i8]* %mystring, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i32 0, i32 0), i32 21, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i32 0, i32 0), i32 21, i1 false) ret void } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 attributes #0 = { minsize noinline nounwind optsize } Index: test/CodeGen/ARM/memfunc.ll =================================================================== --- test/CodeGen/ARM/memfunc.ll +++ test/CodeGen/ARM/memfunc.ll @@ -16,13 +16,13 @@ ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i1 false) ; EABI memset swaps arguments ; CHECK-IOS: mov r1, #1 @@ -33,7 +33,7 @@ ; CHECK-EABI: bl __aeabi_memset ; CHECK-GNUEABI: mov r1, #1 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i1 false) ; EABI uses memclr if value set to 0 ; CHECK-IOS: mov r1, #0 @@ -42,7 +42,7 @@ ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i1 false) ; EABI uses aligned function variants if possible @@ -50,49 +50,49 @@ ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove4 ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy4 ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memset4 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %dest, i8 1, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr4 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 4 %dest, i8 0, i32 500, i1 false) ; CHECK-IOS: bl _memmove ; CHECK-DARWIN: bl _memmove ; CHECK-EABI: bl __aeabi_memmove8 ; CHECK-GNUEABI: bl memmove - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false) ; CHECK-IOS: bl _memcpy ; CHECK-DARWIN: bl _memcpy ; CHECK-EABI: bl __aeabi_memcpy8 ; CHECK-GNUEABI: bl memcpy - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memset8 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 8 %dest, i8 1, i32 500, i1 false) ; CHECK-IOS: bl _memset ; CHECK-DARWIN: bl _memset ; CHECK-EABI: bl __aeabi_memclr8 ; CHECK-GNUEABI: bl memset - call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 8 %dest, i8 0, i32 500, i1 false) unreachable } @@ -113,7 +113,7 @@ ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = bitcast [9 x i8]* %arr0 to i8* - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: add r1, sp, #16 ; CHECK-IOS: bl _memcpy @@ -122,7 +122,7 @@ ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [9 x i8], align 1 %1 = bitcast [9 x i8]* %arr1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK-IOS: mov r0, sp ; CHECK-IOS: mov r1, #1 @@ -138,7 +138,7 @@ ; CHECK-GNUEABI: bl memset %arr2 = alloca [9 x i8], align 1 %2 = bitcast [9 x i8]* %arr2 to i8* - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -155,7 +155,7 @@ ; CHECK-GNUEABI: bl memmove %arr0 = alloca [7 x i8], align 1 %0 = bitcast [7 x i8]* %arr0 to i8* - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r1, sp, #10|sub(.w)? r1, r(7|11), #22}} ; CHECK-IOS: bl _memcpy @@ -164,7 +164,7 @@ ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [7 x i8], align 1 %1 = bitcast [7 x i8]* %arr1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r0, sp, #3|sub(.w)? r0, r(7|11), #29}} ; CHECK-IOS: mov r1, #1 @@ -177,7 +177,7 @@ ; CHECK-GNUEABI: bl memset %arr2 = alloca [7 x i8], align 1 %2 = bitcast [7 x i8]* %arr2 to i8* - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -194,7 +194,7 @@ ; CHECK-GNUEABI: bl memmove %arr0 = alloca [9 x i8], align 1 %0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w) r., r(7|11), #26}} ; CHECK-IOS: bl _memcpy @@ -203,7 +203,7 @@ ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [9 x i8], align 1 %1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w) r., r(7|11), #35}} ; CHECK-IOS: mov r1, #1 @@ -216,7 +216,7 @@ ; CHECK-GNUEABI: bl memset %arr2 = alloca [9 x i8], align 1 %2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -233,7 +233,7 @@ ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy @@ -242,7 +242,7 @@ ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 @@ -255,7 +255,7 @@ ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -272,7 +272,7 @@ ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #42}} ; CHECK-IOS: bl _memcpy @@ -281,7 +281,7 @@ ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #55}} ; CHECK-IOS: mov r1, #1 @@ -294,7 +294,7 @@ ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -311,7 +311,7 @@ ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy @@ -320,7 +320,7 @@ ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 @@ -333,7 +333,7 @@ ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -350,7 +350,7 @@ ; CHECK-GNUEABI: bl memmove %arr0 = alloca [13 x i8], align 1 %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16 - call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false) + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(10|14)|sub(.w)? r., r(7|11), #34}} ; CHECK-IOS: bl _memcpy @@ -359,7 +359,7 @@ ; CHECK-GNUEABI: bl memcpy %arr1 = alloca [13 x i8], align 1 %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i1 false) ; CHECK: {{add(.w)? r., sp, #(1|5)|sub(.w)? r., r(7|11), #47}} ; CHECK-IOS: mov r1, #1 @@ -372,7 +372,7 @@ ; CHECK-GNUEABI: bl memset %arr2 = alloca [13 x i8], align 1 %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16 - call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false) + call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i1 false) unreachable } @@ -390,15 +390,15 @@ @arr9 = weak_odr global [128 x i8] undef define void @f9(i8* %dest, i32 %n) "no-frame-pointer-elim"="true" { entry: - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr8, i32 0, i32 0), i32 %n, i32 1, i1 false) - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr9, i32 0, i32 0), i32 %n, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr1, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr2, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr3, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr4, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr5, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr6, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @arr7, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr8, i32 0, i32 0), i32 %n, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @arr9, i32 0, i32 0), i32 %n, i1 false) unreachable } @@ -428,6 +428,6 @@ ; CHECK-NOT: arr7: -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind Index: test/CodeGen/ARM/memset-inline.ll =================================================================== --- test/CodeGen/ARM/memset-inline.ll +++ test/CodeGen/ARM/memset-inline.ll @@ -12,7 +12,7 @@ ; CHECK-6M: str r1, [r0] ; CHECK-6M: str r1, [r0, #4] ; CHECK-6M: str r1, [r0, #8] - call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 12, i1 false) ret void } @@ -33,7 +33,7 @@ ; CHECK-6M: str [[REG]], [sp] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 - call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i1 false) call void @something(i8* %0) nounwind ret void } @@ -54,7 +54,7 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] %0 = trunc i32 %i to i8 - call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false) + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i1 false) call void @something(i8* %p) %inc = add nuw nsw i32 %i, 1 %exitcond = icmp eq i32 %inc, 255 @@ -78,7 +78,7 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] %0 = trunc i32 %i to i8 - call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false) + call void @llvm.memset.p0i8.i32(i8* align 2 %p, i8 %0, i32 4, i1 false) call void @something(i8* %p) %inc = add nuw nsw i32 %i, 1 %exitcond = icmp eq i32 %inc, 255 @@ -89,5 +89,5 @@ } declare void @something(i8*) nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind Index: test/CodeGen/ARM/stack-protector-bmovpcb_call.ll =================================================================== --- test/CodeGen/ARM/stack-protector-bmovpcb_call.ll +++ test/CodeGen/ARM/stack-protector-bmovpcb_call.ll @@ -15,13 +15,13 @@ entry: %title = alloca [15 x i8], align 1 %0 = getelementptr inbounds [15 x i8], [15 x i8]* %title, i32 0, i32 0 - call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %0, i8* align 1 getelementptr inbounds ([15 x i8], [15 x i8]* @main.title, i32 0, i32 0), i32 15, i1 false) %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i8* %0) #3 ret i32 0 } ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #1 ; Function Attrs: nounwind optsize declare i32 @printf(i8* nocapture readonly, ...) #2 Index: test/CodeGen/ARM/struct-byval-frame-index.ll =================================================================== --- test/CodeGen/ARM/struct-byval-frame-index.ll +++ test/CodeGen/ARM/struct-byval-frame-index.ll @@ -60,10 +60,10 @@ @brefframe = external global [4 x [4 x i8]], align 1 ; Function Attrs: nounwind -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) #0 ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 ; Function Attrs: nounwind declare void @SetMotionVectorsMB(%structK* nocapture, i32) #1 @@ -122,10 +122,10 @@ unreachable if.end230: ; preds = %if.end164 - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* bitcast ([4 x i32]* @b8mode to i8*), i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 undef, i8* align 4 bitcast ([4 x i32]* @b8mode to i8*), i32 16, i1 false) %b8pdir = getelementptr inbounds %structK, %structK* %2, i32 %1, i32 15 %3 = bitcast [4 x i32]* %b8pdir to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i32 4, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 bitcast ([4 x i32]* @b8pdir to i8*), i32 16, i1 false) br i1 undef, label %if.end236, label %if.then233 if.then233: ; preds = %if.end230 Index: test/CodeGen/ARM/tailcall-mem-intrinsics.ll =================================================================== --- test/CodeGen/ARM/tailcall-mem-intrinsics.ll +++ test/CodeGen/ARM/tailcall-mem-intrinsics.ll @@ -4,7 +4,7 @@ ; CHECK: bl __aeabi_memcpy define i8* @tail_memcpy_ret(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret i8* %p } @@ -12,7 +12,7 @@ ; CHECK: bl __aeabi_memmove define i8* @tail_memmove_ret(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { entry: - tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i1 false) ret i8* %p } @@ -20,12 +20,12 @@ ; CHECK: bl __aeabi_memset define i8* @tail_memset_ret(i8* nocapture %p, i8 %c, i32 %n) #0 { entry: - tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i1 false) ret i8* %p } -declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 -declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) #0 attributes #0 = { nounwind } Index: test/CodeGen/AVR/std-ldd-immediate-overflow.ll =================================================================== --- test/CodeGen/AVR/std-ldd-immediate-overflow.ll +++ test/CodeGen/AVR/std-ldd-immediate-overflow.ll @@ -8,11 +8,11 @@ store i32 0, i32 *%1 %2 = bitcast [4 x i8]* %dst to i8* %3 = bitcast [4 x i8]* %src to i8* - call void @llvm.memcpy.p0i8.p0i8.i16(i8* %2, i8* %3, i16 4, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i16(i8* %2, i8* %3, i16 4, i1 false) ; CHECK-NOT: std {{[XYZ]}}+64, {{r[0-9]+}} ; CHECK-NOT: ldd {{r[0-9]+}}, {{[XYZ]}}+64 ret i32 0 } -declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i32, i1) +declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i1) Index: test/CodeGen/BPF/byval.ll =================================================================== --- test/CodeGen/BPF/byval.ll +++ test/CodeGen/BPF/byval.ll @@ -16,7 +16,7 @@ store i32 3, i32* %arrayinit.element2, align 8 %arrayinit.start = getelementptr inbounds %struct.S, %struct.S* %.compoundliteral, i64 0, i32 0, i64 3 %scevgep4 = bitcast i32* %arrayinit.start to i8* - call void @llvm.memset.p0i8.i64(i8* %scevgep4, i8 0, i64 28, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* align 4 %scevgep4, i8 0, i64 28, i1 false) call void @foo(i32 %a, %struct.S* byval align 8 %.compoundliteral) #3 ret void } @@ -24,4 +24,4 @@ declare void @foo(i32, %struct.S* byval align 8) #1 ; Function Attrs: nounwind -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3 +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) #3 Index: test/CodeGen/BPF/ex1.ll =================================================================== --- test/CodeGen/BPF/ex1.ll +++ test/CodeGen/BPF/ex1.ll @@ -12,7 +12,7 @@ %devname = alloca [3 x i8], align 1 %fmt = alloca [15 x i8], align 1 %1 = getelementptr inbounds [3 x i8], [3 x i8]* %devname, i64 0, i64 0 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i1 false) %2 = getelementptr inbounds %struct.bpf_context, %struct.bpf_context* %ctx, i64 0, i32 0 %3 = load i64, i64* %2, align 8 %4 = inttoptr i64 %3 to %struct.sk_buff* @@ -25,7 +25,7 @@ ;