diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -3218,6 +3218,26 @@ Note that this intrinsic cannot yet be called in a ``constexpr`` context. +Guaranteed inlined memset +^^^^^^^^^^^^^^^^^^^^^^^^^ + +. code-block:: c + + void __builtin_memset_inline(void *dst, int value, size_t size); + + +``__builtin_memset_inline`` has been designed as a building block for efficient +``memset`` implementations. It is identical to ``__builtin_memset`` but also +guarantees not to call any external functions. See LLVM IR `llvm.memset.inline +`_ intrinsic +for more information. + +This is useful to implement a custom version of ``memset``, implement a +``libc`` memset or work around the absence of a ``libc``. + +Note that the `size` argument must be a compile time constant. + +Note that this intrinsic cannot yet be called in a ``constexpr`` context. Atomic Min/Max builtins with memory ordering -------------------------------------------- diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -555,6 +555,7 @@ BUILTIN(__builtin_memmove, "v*v*vC*z", "nF") BUILTIN(__builtin_mempcpy, "v*v*vC*z", "nF") BUILTIN(__builtin_memset, "v*v*iz", "nF") +BUILTIN(__builtin_memset_inline, "vv*iIz", "nt") BUILTIN(__builtin_printf, "icC*.", "Fp:0:") BUILTIN(__builtin_stpcpy, "c*c*cC*", "nF") BUILTIN(__builtin_stpncpy, "c*c*cC*z", "nF") diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -32,6 +32,7 @@ void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, llvm::BasicBlock::iterator InsertPt) const override; + private: CodeGenFunction *CGF = nullptr; }; @@ -45,17 +46,18 @@ /// Storing a reference to the type cache here makes it a lot easier /// to build natural-feeling, target-specific IR. const CodeGenTypeCache &TypeCache; + public: CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::LLVMContext &C) - : CGBuilderBaseTy(C), TypeCache(TypeCache) {} - CGBuilderTy(const CodeGenTypeCache &TypeCache, - llvm::LLVMContext &C, const llvm::ConstantFolder &F, + : CGBuilderBaseTy(C), TypeCache(TypeCache) {} + CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::LLVMContext &C, + const llvm::ConstantFolder &F, const CGBuilderInserterTy &Inserter) - : CGBuilderBaseTy(C, F, Inserter), TypeCache(TypeCache) {} + : CGBuilderBaseTy(C, F, Inserter), TypeCache(TypeCache) {} CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::Instruction *I) - : CGBuilderBaseTy(I), TypeCache(TypeCache) {} + : CGBuilderBaseTy(I), TypeCache(TypeCache) {} CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::BasicBlock *BB) - : CGBuilderBaseTy(BB), TypeCache(TypeCache) {} + : CGBuilderBaseTy(BB), TypeCache(TypeCache) {} llvm::ConstantInt *getSize(CharUnits N) { return llvm::ConstantInt::get(TypeCache.SizeTy, N.getQuantity()); @@ -102,7 +104,8 @@ using CGBuilderBaseTy::CreateAlignedStore; llvm::StoreInst *CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, - CharUnits Align, bool IsVolatile = false) { + CharUnits Align, + bool IsVolatile = false) { return CreateAlignedStore(Val, Addr, Align.getAsAlign(), IsVolatile); } @@ -165,8 +168,8 @@ Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name = "") { auto *PtrTy = Ty->getPointerTo(Addr.getAddressSpace()); - return Address(CreateBitCast(Addr.getPointer(), PtrTy, Name), - Ty, Addr.getAlignment()); + return Address(CreateBitCast(Addr.getPointer(), PtrTy, Name), Ty, + Addr.getAlignment()); } using CGBuilderBaseTy::CreatePointerBitCastOrAddrSpaceCast; @@ -193,10 +196,10 @@ const llvm::StructLayout *Layout = DL.getStructLayout(ElTy); auto Offset = CharUnits::fromQuantity(Layout->getElementOffset(Index)); - return Address(CreateStructGEP(Addr.getElementType(), - Addr.getPointer(), Index, Name), - ElTy->getElementType(Index), - Addr.getAlignment().alignmentAtOffset(Offset)); + return Address( + CreateStructGEP(Addr.getElementType(), Addr.getPointer(), Index, Name), + ElTy->getElementType(Index), + Addr.getAlignment().alignmentAtOffset(Offset)); } /// Given @@ -264,10 +267,10 @@ CharUnits EltSize = CharUnits::fromQuantity(DL.getTypeAllocSize(Addr.getElementType())); - return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(), Index, - Name), - Addr.getElementType(), - Addr.getAlignment().alignmentOfArrayElement(EltSize)); + return Address( + CreateGEP(Addr.getElementType(), Addr.getPointer(), Index, Name), + Addr.getElementType(), + Addr.getAlignment().alignmentOfArrayElement(EltSize)); } /// Given a pointer to i8, adjust it by a given constant offset. @@ -341,9 +344,16 @@ Dest.getAlignment().getAsAlign(), IsVolatile); } + using CGBuilderBaseTy::CreateMemSetInline; + llvm::CallInst *CreateMemSetInline(Address Dest, llvm::Value *Value, + uint64_t Size) { + return CreateMemSetInline(Dest.getPointer(), + Dest.getAlignment().getAsAlign(), Value, + getInt64(Size)); + } + using CGBuilderBaseTy::CreatePreserveStructAccessIndex; - Address CreatePreserveStructAccessIndex(Address Addr, - unsigned Index, + Address CreatePreserveStructAccessIndex(Address Addr, unsigned Index, unsigned FieldIndex, llvm::MDNode *DbgInfo) { llvm::StructType *ElTy = cast(Addr.getElementType()); @@ -363,7 +373,7 @@ } }; -} // end namespace CodeGen -} // end namespace clang +} // end namespace CodeGen +} // end namespace clang #endif diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3508,6 +3508,17 @@ Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); return RValue::get(Dest.getPointer()); } + case Builtin::BI__builtin_memset_inline: { + Address Dest = EmitPointerWithAlignment(E->getArg(0)); + Value *ByteVal = + Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); + uint64_t Size = + E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); + EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), + E->getArg(0)->getExprLoc(), FD, 0); + Builder.CreateMemSetInline(Dest, ByteVal, Size); + return RValue::get(nullptr); + } case Builtin::BI__builtin___memset_chk: { // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. Expr::EvalResult SizeResult, DstSizeResult; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2290,6 +2290,30 @@ } break; } + case Builtin::BI__builtin_memset_inline: { + if (checkArgCount(*this, TheCall, 3)) + return ExprError(); + auto ArgArrayConversionFailed = [&](unsigned Arg) { + ExprResult ArgExpr = + DefaultFunctionArrayLvalueConversion(TheCall->getArg(Arg)); + if (ArgExpr.isInvalid()) + return true; + TheCall->setArg(Arg, ArgExpr.get()); + return false; + }; + + if (ArgArrayConversionFailed(0)) + return true; + clang::Expr *SizeOp = TheCall->getArg(2); + // We warn about setting to `nullptr` pointers when `size` is + // greater than 0. When `size` is value dependent we cannot evaluate its + // value so we bail out. + if (SizeOp->isValueDependent()) + break; + if (!SizeOp->EvaluateKnownConstInt(Context).isZero()) + CheckNonNullArgument(*this, TheCall->getArg(0), TheCall->getExprLoc()); + break; + } #define BUILTIN(ID, TYPE, ATTRS) #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \ case Builtin::BI##ID: \ diff --git a/clang/test/CodeGen/builtins-memset-inline.c b/clang/test/CodeGen/builtins-memset-inline.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtins-memset-inline.c @@ -0,0 +1,21 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s + +// CHECK-LABEL: define{{.*}} void @test_memset_inline_0(i8* noundef %dst, i8 noundef signext %value) +void test_memset_inline_0(void *dst, char value) { + // CHECK: call void @llvm.memset.inline.p0i8.i64(i8* align 1 %0, i8 %1, i64 0, i1 false) + __builtin_memset_inline(dst, value, 0); +} + +// CHECK-LABEL: define{{.*}} void @test_memset_inline_1(i8* noundef %dst, i8 noundef signext %value) +void test_memset_inline_1(void *dst, char value) { + // CHECK: call void @llvm.memset.inline.p0i8.i64(i8* align 1 %0, i8 %1, i64 1, i1 false) + __builtin_memset_inline(dst, value, 1); +} + +// CHECK-LABEL: define{{.*}} void @test_memset_inline_4(i8* noundef %dst, i8 noundef signext %value) +void test_memset_inline_4(void *dst, char value) { + // CHECK: call void @llvm.memset.inline.p0i8.i64(i8* align 1 %0, i8 %1, i64 4, i1 false) + __builtin_memset_inline(dst, value, 4); +} diff --git a/clang/test/Sema/builtins-memset-inline.cpp b/clang/test/Sema/builtins-memset-inline.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Sema/builtins-memset-inline.cpp @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +#define NULL ((char *)0) + +#if __has_builtin(__builtin_memset_inline) +#warning defined as expected +// expected-warning@-1 {{defined as expected}} +#endif + +void test_memset_inline_null_dst(void *ptr) { + __builtin_memset_inline(NULL, 1, 4); // expected-warning {{null passed to a callee that requires a non-null argument}} +} + +void test_memset_inline_null_buffer_is_ok_if_size_is_zero(void *ptr, char value) { + __builtin_memset_inline(NULL, value, /*size */ 0); +} + +void test_memset_inline_non_constant_size(void *dst, char value, unsigned size) { + __builtin_memset_inline(dst, value, size); // expected-error {{argument to '__builtin_memset_inline' must be a constant integer}} +} + +template +void test_memset_inline_template(void *dst, char value) { + // we do not try to evaluate size in non intantiated templates. + __builtin_memset_inline(dst, value, size); +} + +void test_memset_inline_implicit_conversion(void *ptr, char value) { + char a[5]; + __builtin_memset_inline(a, value, 5); +} + +void test_memset_inline_num_args(void *dst, char value) { + __builtin_memset_inline(); // expected-error {{too few arguments to function call}} + __builtin_memset_inline(dst, value, 4, NULL); // expected-error {{too many arguments to function call}} +} diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -13867,6 +13867,71 @@ If ```` is not zero, ```` should be well-defined, otherwise the behavior is undefined. +.. _int_memset_inline: + +'``llvm.memset.inline``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.memset.inline`` on any +integer bit width and for different address spaces. Not all targets +support all bit widths however. + +:: + + declare void @llvm.memset.inline.p0i8.p0i8.i32(i8* , i8 , + i32 , + i1 ) + declare void @llvm.memset.inline.p0i8.p0i8.i64(i8* , i8 , + i64 , + i1 ) + +Overview: +""""""""" + +The '``llvm.memset.inline.*``' intrinsics fill a block of memory with a +particular byte value and guarantees that no external functions are called. + +Note that, unlike the standard libc function, the ``llvm.memset.inline.*`` +intrinsics do not return a value, takes extra isvolatile +arguments and the pointers can be in specified address spaces. + +Arguments: +"""""""""" + +The first argument is a pointer to the destination to fill, the second +is the byte value with which to fill it, the third argument is a constant +integer argument specifying the number of bytes to fill, and the fourth +is a boolean indicating a volatile access. + +The :ref:`align ` parameter attribute can be provided +for the first argument. + +If the ``isvolatile`` parameter is ``true``, the ``llvm.memset.inline`` call is +a :ref:`volatile operation `. The detailed access behavior is not +very cleanly specified and it is unwise to depend on it. + +Semantics: +"""""""""" + +The '``llvm.memset.inline.*``' intrinsics fill "len" bytes of memory starting +at the destination location. If the argument is known to be +aligned to some boundary, this can be specified as an attribute on +the argument. + +``len`` must be a constant expression. +If ```` is 0, it is no-op modulo the behavior of attributes attached to +the arguments. +If ```` is not a well-defined value, the behavior is undefined. +If ```` is not zero, ```` should be well-defined, otherwise the +behavior is undefined. + +The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of +'``llvm.memset.*``', but the generated code is guaranteed not to call any +external functions. + '``llvm.sqrt.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1052,7 +1052,8 @@ const AAMDNodes &AAInfo = AAMDNodes()); SDValue getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, - SDValue Size, Align Alignment, bool isVol, bool isTailCall, + SDValue Size, Align Alignment, bool isVol, + bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo = AAMDNodes()); diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -578,6 +578,12 @@ MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + CallInst *CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, Value *Val, + Value *Size, bool IsVolatile = false, + MDNode *TBAATag = nullptr, + MDNode *ScopeTag = nullptr, + MDNode *NoAliasTag = nullptr); + /// Create and insert an element unordered-atomic memset of the region of /// memory starting at the given pointer to the given value. /// diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -978,6 +978,7 @@ case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: + case Intrinsic::memset_inline: case Intrinsic::memcpy_inline: return true; default: @@ -994,7 +995,28 @@ public: // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const IntrinsicInst *I) { - return I->getIntrinsicID() == Intrinsic::memset; + switch (I->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memset_inline: + return true; + default: + return false; + } + } + static bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +/// This class wraps the llvm.memset.inline intrinsic. +class MemSetInlineInst : public MemSetInst { +public: + ConstantInt *getLength() const { + return cast(MemSetInst::getLength()); + } + // Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memset_inline; } static bool classof(const Value *V) { return isa(V) && classof(cast(V)); @@ -1079,6 +1101,7 @@ case Intrinsic::memcpy_inline: case Intrinsic::memmove: case Intrinsic::memset: + case Intrinsic::memset_inline: case Intrinsic::memcpy_element_unordered_atomic: case Intrinsic::memmove_element_unordered_atomic: case Intrinsic::memset_element_unordered_atomic: @@ -1100,6 +1123,7 @@ static bool classof(const IntrinsicInst *I) { switch (I->getIntrinsicID()) { case Intrinsic::memset: + case Intrinsic::memset_inline: case Intrinsic::memset_element_unordered_atomic: return true; default: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -651,6 +651,17 @@ NoCapture>, WriteOnly>, ImmArg>]>; +// Memset semantic that is guaranteed to be inlined. +// In particular this means that the generated code is not allowed to call any +// external function. +// The third argument (specifying the size) must be a constant. +def int_memset_inline + : Intrinsic<[], + [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i1_ty], + [IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, + NoCapture>, WriteOnly>, + ImmArg>, ImmArg>]>; + // FIXME: Add version of these floating point intrinsics which allow non-default // rounding modes and FP exception handling. diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp --- a/llvm/lib/Analysis/Lint.cpp +++ b/llvm/lib/Analysis/Lint.cpp @@ -335,6 +335,12 @@ MSI->getDestAlign(), nullptr, MemRef::Write); break; } + case Intrinsic::memset_inline: { + MemSetInlineInst *MSII = cast(&I); + visitMemoryReference(I, MemoryLocation::getForDest(MSII), + MSII->getDestAlign(), nullptr, MemRef::Write); + break; + } case Intrinsic::vastart: Check(I.getParent()->getParent()->isVarArg(), diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6990,7 +6990,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, Align Alignment, bool isVol, - MachinePointerInfo DstPtrInfo, + bool AlwaysInline, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo) { // Turn a memset of undef to nop. // FIXME: We need to honor volatile even is Src is undef. @@ -7010,8 +7010,10 @@ DstAlignCanChange = true; bool IsZeroVal = isa(Src) && cast(Src)->isZero(); + unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize); + if (!TLI.findOptimalMemOpLowering( - MemOps, TLI.getMaxStoresPerMemset(OptSize), + MemOps, Limit, MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) return SDValue(); @@ -7307,7 +7309,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, - bool isVol, bool isTailCall, + bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, const AAMDNodes &AAInfo) { // Check to see if we should lower the memset to stores first. @@ -7320,7 +7322,7 @@ SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, DstPtrInfo, AAInfo); + isVol, false, DstPtrInfo, AAInfo); if (Result.getNode()) return Result; @@ -7335,6 +7337,15 @@ return Result; } + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + return getMemsetStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Alignment, isVol, true, + DstPtrInfo, AAInfo); + } + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); // Emit a library call. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5909,10 +5909,30 @@ bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, + SDValue MS = DAG.getMemset( + Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, + isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); + updateDAGForMaybeTailCall(MS); + return; + } + case Intrinsic::memset_inline: { + const auto &MSII = cast(I); + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Value = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + assert(isa(Size) && "memset_inline needs constant size"); + // @llvm.memset defines 0 and 1 to both mean no alignment. + Align DstAlign = MSII.getDestAlign().valueOrOne(); + bool isVol = MSII.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. + SDValue Root = isVol ? getRoot() : getMemoryRoot(); + SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata()); - updateDAGForMaybeTailCall(MS); + updateDAGForMaybeTailCall(MC); return; } case Intrinsic::memmove: { diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -164,6 +164,35 @@ return CI; } +CallInst *IRBuilderBase::CreateMemSetInline(Value *Dst, MaybeAlign DstAlign, + Value *Val, Value *Size, + bool IsVolatile, MDNode *TBAATag, + MDNode *ScopeTag, + MDNode *NoAliasTag) { + Dst = getCastedInt8PtrValue(Dst); + Value *Ops[] = {Dst, Val, Size, getInt1(IsVolatile)}; + Type *Tys[] = {Dst->getType(), Size->getType()}; + Module *M = BB->getParent()->getParent(); + Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset_inline, Tys); + + CallInst *CI = createCallHelper(TheFn, Ops, this); + + if (DstAlign) + cast(CI)->setDestAlignment(DstAlign->value()); + + // Set the TBAA info if present. + if (TBAATag) + CI->setMetadata(LLVMContext::MD_tbaa, TBAATag); + + if (ScopeTag) + CI->setMetadata(LLVMContext::MD_alias_scope, ScopeTag); + + if (NoAliasTag) + CI->setMetadata(LLVMContext::MD_noalias, NoAliasTag); + + return CI; +} + CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet( Value *Ptr, Value *Val, Value *Size, Align Alignment, uint32_t ElementSize, MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) { diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4917,7 +4917,8 @@ case Intrinsic::memcpy: case Intrinsic::memcpy_inline: case Intrinsic::memmove: - case Intrinsic::memset: { + case Intrinsic::memset: + case Intrinsic::memset_inline: { const auto *MI = cast(&Call); auto IsValidAlignment = [&](unsigned Alignment) -> bool { return Alignment == 0 || isPowerOf2_32(Alignment); diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -175,7 +175,8 @@ DAG.getNode(ISD::ADD, dl, AddrVT, Dst, DAG.getConstant(Offset, dl, AddrVT)), Val, DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, - isVolatile, false, DstPtrInfo.getWithOffset(Offset)); + isVolatile, /* AlwaysInline */ false, + /* isTailCall */ false, DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. diff --git a/llvm/test/CodeGen/X86/memset-inline.ll b/llvm/test/CodeGen/X86/memset-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/memset-inline.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s + +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memset.inline.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind + +define void @test1(i8* %a, i8 %value) nounwind { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 +; CHECK-NEXT: imulq %rax, %rcx +; CHECK-NEXT: movq %rcx, (%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 8, i1 0) + ret void +} + +define void @regular_memset_calls_external_function(i8* %a, i8 %value) nounwind { +; CHECK-LABEL: regular_memset_calls_external_function: +; CHECK: # %bb.0: +; CHECK-NEXT: movl $129, %edx +; CHECK-NEXT: jmp memset@PLT # TAILCALL + tail call void @llvm.memset.p0i8.i64(i8* %a, i8 %value, i64 129, i1 0) + ret void +} + +define void @inlined_set_doesnt_call_external_function(i8* %a, i8 %value) nounwind { +; CHECK-LABEL: inlined_set_doesnt_call_external_function: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 +; CHECK-NEXT: imulq %rax, %rcx +; CHECK-NEXT: movq %rcx, 120(%rdi) +; CHECK-NEXT: movq %rcx, 112(%rdi) +; CHECK-NEXT: movq %rcx, 104(%rdi) +; CHECK-NEXT: movq %rcx, 96(%rdi) +; CHECK-NEXT: movq %rcx, 88(%rdi) +; CHECK-NEXT: movq %rcx, 80(%rdi) +; CHECK-NEXT: movq %rcx, 72(%rdi) +; CHECK-NEXT: movq %rcx, 64(%rdi) +; CHECK-NEXT: movq %rcx, 56(%rdi) +; CHECK-NEXT: movq %rcx, 48(%rdi) +; CHECK-NEXT: movq %rcx, 40(%rdi) +; CHECK-NEXT: movq %rcx, 32(%rdi) +; CHECK-NEXT: movq %rcx, 24(%rdi) +; CHECK-NEXT: movq %rcx, 16(%rdi) +; CHECK-NEXT: movq %rcx, 8(%rdi) +; CHECK-NEXT: movq %rcx, (%rdi) +; CHECK-NEXT: movb %cl, 128(%rdi) +; CHECK-NEXT: retq + tail call void @llvm.memset.inline.p0i8.i64(i8* %a, i8 %value, i64 129, i1 0) + ret void +} diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll --- a/llvm/test/Other/lint.ll +++ b/llvm/test/Other/lint.ll @@ -6,6 +6,8 @@ declare void @llvm.stackrestore(i8*) declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind +declare void @llvm.memset.p0i8.i8.i64(i8* nocapture, i8, i64, i1) nounwind +declare void @llvm.memset.inline.p0i8.i8.i64(i8* nocapture, i8, i64, i1) nounwind declare void @has_sret(i8* sret(i8) %p) declare void @has_noaliases(i32* noalias %p, i32* %q) declare void @one_arg(i32) @@ -87,6 +89,11 @@ ; CHECK: Unusual: noalias argument aliases another argument call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i1 0) +; CHECK: Write to read-only memory +call void @llvm.memset.p0i8.i8.i64(i8* bitcast (i32* @CG to i8*), i8 1, i64 1, i1 0) +; CHECK: Write to read-only memory +call void @llvm.memset.inline.p0i8.i8.i64(i8* bitcast (i32* @CG to i8*), i8 1, i64 1, i1 0) + ; CHECK: Undefined behavior: Buffer overflow %wider = bitcast i8* %buf to i16* store i16 0, i16* %wider diff --git a/llvm/test/Verifier/intrinsic-immarg.ll b/llvm/test/Verifier/intrinsic-immarg.ll --- a/llvm/test/Verifier/intrinsic-immarg.ll +++ b/llvm/test/Verifier/intrinsic-immarg.ll @@ -62,6 +62,23 @@ ret void } +declare void @llvm.memset.inline.p0i8.i32(i8* nocapture, i8, i32, i1) +define void @memset_inline_is_volatile(i8* %dest, i8 %value, i1 %is.volatile) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i1 %is.volatile + ; CHECK-NEXT: call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 8, i1 %is.volatile) + call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 8, i1 %is.volatile) + ret void +} + +define void @memset_inline_variable_size(i8* %dest, i8 %value, i32 %size) { + ; CHECK: immarg operand has non-immediate parameter + ; CHECK-NEXT: i32 %size + ; CHECK-NEXT: call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 %size, i1 true) + call void @llvm.memset.inline.p0i8.i32(i8* %dest, i8 %value, i32 %size, i1 true) + ret void +} + declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1) define void @objectsize(i8* %ptr, i1 %a, i1 %b, i1 %c) { diff --git a/llvm/test/Verifier/memset-inline.ll b/llvm/test/Verifier/memset-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Verifier/memset-inline.ll @@ -0,0 +1,9 @@ +; RUN: not opt -verify < %s 2>&1 | FileCheck %s + +; CHECK: alignment is not a power of two + +define void @foo(i8* %P, i8 %value) { + call void @llvm.memset.inline.p0i8.i32(i8* align 3 %P, i8 %value, i32 4, i1 false) + ret void +} +declare void @llvm.memset.inline.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind