Index: llvm/trunk/docs/LangRef.rst =================================================================== --- llvm/trunk/docs/LangRef.rst +++ llvm/trunk/docs/LangRef.rst @@ -10339,6 +10339,8 @@ aligned to some boundary, this can be specified as the fourth argument, otherwise it should be set to 0 or 1 (both meaning no alignment). +.. _int_memset: + '``llvm.memset.*``' Intrinsics ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -14258,3 +14260,76 @@ actual element size. The optimizer is allowed to inline the memory copy when it's profitable to do so. + +.. _int_memset_element_unordered_atomic: + +'``llvm.memset.element.unordered.atomic``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.memset.element.unordered.atomic`` on +any integer bit width and for different address spaces. Not all targets +support all bit widths however. + +:: + + declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* , + i8 , + i32 , + i32 ) + declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* , + i8 , + i64 , + i32 ) + +Overview: +""""""""" + +The '``llvm.memset.element.unordered.atomic.*``' intrinsic is a specialization of the +'``llvm.memset.*``' intrinsic. It differs in that the ``dest`` is treated as an array +with elements that are exactly ``element_size`` bytes, and the assignment to that array +uses uses a sequence of :ref:`unordered atomic ` store operations +that are a positive integer multiple of the ``element_size`` in size. + +Arguments: +"""""""""" + +The first three arguments are the same as they are in the :ref:`@llvm.memset ` +intrinsic, with the added constraint that ``len`` is required to be a positive integer +multiple of the ``element_size``. If ``len`` is not a positive integer multiple of +``element_size``, then the behaviour of the intrinsic is undefined. + +``element_size`` must be a compile-time constant positive power of two no greater than +target-specific atomic access size limit. + +The ``dest`` input pointer must have the ``align`` parameter attribute specified. It +must be a power of two no less than the ``element_size``. Caller guarantees that +the destination pointer is aligned to that boundary. + +Semantics: +"""""""""" + +The '``llvm.memset.element.unordered.atomic.*``' intrinsic sets the ``len`` bytes of +memory starting at the destination location to the given ``value``. The memory is +set with a sequence of store operations where each access is guaranteed to be a +multiple of ``element_size`` bytes wide and aligned at an ``element_size`` boundary. + +The order of the assignment is unspecified. Only one write is issued to the +destination buffer per element. It is well defined to have concurrent reads and +writes to the destination provided those reads and writes are unordered atomic +when specified. + +This intrinsic does not provide any additional ordering guarantees over those +provided by a set of unordered stores to the destination. + +Lowering: +""""""""" + +In the most general case call to the '``llvm.memset.element.unordered.atomic.*``' is +lowered to a call to the symbol ``__llvm_memset_element_unordered_atomic_*``. Where '*' +is replaced with an actual element size. + +The optimizer is allowed to inline the memory assignment when it's profitable to do so. + Index: llvm/trunk/include/llvm/CodeGen/RuntimeLibcalls.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/RuntimeLibcalls.h +++ llvm/trunk/include/llvm/CodeGen/RuntimeLibcalls.h @@ -346,6 +346,12 @@ MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8, MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16, + MEMSET_ELEMENT_UNORDERED_ATOMIC_1, + MEMSET_ELEMENT_UNORDERED_ATOMIC_2, + MEMSET_ELEMENT_UNORDERED_ATOMIC_4, + MEMSET_ELEMENT_UNORDERED_ATOMIC_8, + MEMSET_ELEMENT_UNORDERED_ATOMIC_16, + // EXCEPTION HANDLING UNWIND_RESUME, @@ -526,6 +532,12 @@ /// MEMMOVE_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or /// UNKNOW_LIBCALL if there is none. Libcall getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + + /// getMEMSET_ELEMENT_UNORDERED_ATOMIC - Return + /// MEMSET_ELEMENT_UNORDERED_ATOMIC_* value for the given element size or + /// UNKNOW_LIBCALL if there is none. + Libcall getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize); + } } Index: llvm/trunk/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicInst.h +++ llvm/trunk/include/llvm/IR/IntrinsicInst.h @@ -385,6 +385,86 @@ } }; + /// This class represents atomic memset intrinsic + /// TODO: Integrate this class into MemIntrinsic hierarchy; for now this is + /// C&P of all methods from that hierarchy + class ElementUnorderedAtomicMemSetInst : public IntrinsicInst { + private: + enum { ARG_DEST = 0, ARG_VALUE = 1, ARG_LENGTH = 2, ARG_ELEMENTSIZE = 3 }; + + public: + Value *getRawDest() const { + return const_cast(getArgOperand(ARG_DEST)); + } + const Use &getRawDestUse() const { return getArgOperandUse(ARG_DEST); } + Use &getRawDestUse() { return getArgOperandUse(ARG_DEST); } + + Value *getValue() const { return const_cast(getArgOperand(ARG_VALUE)); } + const Use &getValueUse() const { return getArgOperandUse(ARG_VALUE); } + Use &getValueUse() { return getArgOperandUse(ARG_VALUE); } + + Value *getLength() const { + return const_cast(getArgOperand(ARG_LENGTH)); + } + const Use &getLengthUse() const { return getArgOperandUse(ARG_LENGTH); } + Use &getLengthUse() { return getArgOperandUse(ARG_LENGTH); } + + bool isVolatile() const { return false; } + + Value *getRawElementSizeInBytes() const { + return const_cast(getArgOperand(ARG_ELEMENTSIZE)); + } + + ConstantInt *getElementSizeInBytesCst() const { + return cast(getRawElementSizeInBytes()); + } + + uint32_t getElementSizeInBytes() const { + return getElementSizeInBytesCst()->getZExtValue(); + } + + /// This is just like getRawDest, but it strips off any cast + /// instructions that feed it, giving the original input. The returned + /// value is guaranteed to be a pointer. + Value *getDest() const { return getRawDest()->stripPointerCasts(); } + + unsigned getDestAddressSpace() const { + return cast(getRawDest()->getType())->getAddressSpace(); + } + + /// Set the specified arguments of the instruction. + void setDest(Value *Ptr) { + assert(getRawDest()->getType() == Ptr->getType() && + "setDest called with pointer of wrong type!"); + setArgOperand(ARG_DEST, Ptr); + } + + void setValue(Value *Val) { + assert(getValue()->getType() == Val->getType() && + "setValue called with value of wrong type!"); + setArgOperand(ARG_VALUE, Val); + } + + void setLength(Value *L) { + assert(getLength()->getType() == L->getType() && + "setLength called with value of wrong type!"); + setArgOperand(ARG_LENGTH, L); + } + + void setElementSizeInBytes(Constant *V) { + assert(V->getType() == Type::getInt8Ty(getContext()) && + "setElementSizeInBytes called with value of wrong type!"); + setArgOperand(ARG_ELEMENTSIZE, V); + } + + static inline bool classof(const IntrinsicInst *I) { + return I->getIntrinsicID() == Intrinsic::memset_element_unordered_atomic; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + }; + /// This is the common base class for memset/memcpy/memmove. class MemIntrinsic : public IntrinsicInst { public: Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -884,6 +884,10 @@ ReadOnly<1> ]>; +// @llvm.memset.element.unordered.atomic.*(dest, value, length, elementsize) +def int_memset_element_unordered_atomic + : Intrinsic<[], [ llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty, llvm_i32_ty ], + [ IntrArgMemOnly, NoCapture<0>, WriteOnly<0> ]>; //===------------------------ Reduction Intrinsics ------------------------===// // Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5032,6 +5032,45 @@ DAG.setRoot(CallResult.second); return nullptr; } + case Intrinsic::memset_element_unordered_atomic: { + auto &MI = cast(I); + SDValue Dst = getValue(MI.getRawDest()); + SDValue Val = getValue(MI.getValue()); + SDValue Length = getValue(MI.getLength()); + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*DAG.getContext()); + Entry.Node = Val; + Args.push_back(Entry); + + Entry.Ty = MI.getLength()->getType(); + Entry.Node = Length; + Args.push_back(Entry); + + uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( + TLI.getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args)); + + std::pair CallResult = TLI.LowerCallTo(CLI); + DAG.setRoot(CallResult.second); + return nullptr; + } case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast(I); DILocalVariable *Variable = DI.getVariable(); Index: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp @@ -394,6 +394,16 @@ "__llvm_memmove_element_unordered_atomic_8"; Names[RTLIB::MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16] = "__llvm_memmove_element_unordered_atomic_16"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_1] = + "__llvm_memset_element_unordered_atomic_1"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_2] = + "__llvm_memset_element_unordered_atomic_2"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_4] = + "__llvm_memset_element_unordered_atomic_4"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_8] = + "__llvm_memset_element_unordered_atomic_8"; + Names[RTLIB::MEMSET_ELEMENT_UNORDERED_ATOMIC_16] = + "__llvm_memset_element_unordered_atomic_16"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; @@ -830,6 +840,23 @@ } } +RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { + switch (ElementSize) { + case 1: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; + case 2: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; + case 4: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; + case 8: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; + case 16: + return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; + default: + return UNKNOWN_LIBCALL; + } +} + /// InitCmpLibcallCCs - Set default comparison libcall CC. /// static void InitCmpLibcallCCs(ISD::CondCode *CCs) { Index: llvm/trunk/lib/IR/Verifier.cpp =================================================================== --- llvm/trunk/lib/IR/Verifier.cpp +++ llvm/trunk/lib/IR/Verifier.cpp @@ -4077,7 +4077,38 @@ "incorrect alignment of the destination argument", CS); Assert(IsValidAlignment(SrcAlignment), "incorrect alignment of the source argument", CS); - + break; + } + case Intrinsic::memset_element_unordered_atomic: { + auto *MI = cast(CS.getInstruction()); + + ConstantInt *ElementSizeCI = + dyn_cast(MI->getRawElementSizeInBytes()); + Assert(ElementSizeCI, + "element size of the element-wise unordered atomic memory " + "intrinsic must be a constant int", + CS); + const APInt &ElementSizeVal = ElementSizeCI->getValue(); + Assert(ElementSizeVal.isPowerOf2(), + "element size of the element-wise atomic memory intrinsic " + "must be a power of 2", + CS); + + if (auto *LengthCI = dyn_cast(MI->getLength())) { + uint64_t Length = LengthCI->getZExtValue(); + uint64_t ElementSize = MI->getElementSizeInBytes(); + Assert((Length % ElementSize) == 0, + "constant length must be a multiple of the element size in the " + "element-wise atomic memory intrinsic", + CS); + } + + auto IsValidAlignment = [&](uint64_t Alignment) { + return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment); + }; + uint64_t DstAlignment = CS.getParamAlignment(0); + Assert(IsValidAlignment(DstAlignment), + "incorrect alignment of the destination argument", CS); break; } case Intrinsic::gcroot: Index: llvm/trunk/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp =================================================================== --- llvm/trunk/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ llvm/trunk/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -405,6 +405,12 @@ /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ unsupported, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ unsupported, + // EXCEPTION HANDLING /* UNWIND_RESUME */ unsupported, @@ -854,6 +860,11 @@ /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, /* MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_1 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_2 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_4 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_8 */ nullptr, +/* MEMSET_ELEMENT_UNORDERED_ATOMIC_16 */ nullptr, /* UNWIND_RESUME */ "_Unwind_Resume", /* SYNC_VAL_COMPARE_AND_SWAP_1 */ "__sync_val_compare_and_swap_1", /* SYNC_VAL_COMPARE_AND_SWAP_2 */ "__sync_val_compare_and_swap_2", Index: llvm/trunk/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/element-wise-atomic-memory-intrinsics.ll @@ -124,5 +124,66 @@ call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %Dst, i8* align 4 %Src, i32 4, i32 4) ret void } +define i8* @test_memset1(i8* %P, i8 %V) { + ; CHECK: test_memset + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 1) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $1, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_1 +} + +define i8* @test_memset2(i8* %P, i8 %V) { + ; CHECK: test_memset2 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 2, i32 2) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $2, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_2 +} + +define i8* @test_memset4(i8* %P, i8 %V) { + ; CHECK: test_memset4 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 4, i32 4) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $4, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_4 +} + +define i8* @test_memset8(i8* %P, i8 %V) { + ; CHECK: test_memset8 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %P, i8 %V, i32 8, i32 8) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $8, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_8 +} + +define i8* @test_memset16(i8* %P, i8 %V) { + ; CHECK: test_memset16 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %P, i8 %V, i32 16, i32 16) + ret i8* %P + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $16, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_16 +} + +define void @test_memset_args(i8** %Storage, i8* %V) { + ; CHECK: test_memset_args + %Dst = load i8*, i8** %Storage + %Val = load i8, i8* %V + + ; 1st arg (%rdi) + ; CHECK-DAG: movq (%rdi), %rdi + ; 2nd arg (%rsi) + ; CHECK-DAG: movzbl (%rsi), %esi + ; 3rd arg (%edx) -- length + ; CHECK-DAG: movl $4, %edx + ; CHECK: __llvm_memset_element_unordered_atomic_4 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %Dst, i8 %Val, i32 4, i32 4) ret void +} + declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind +declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind Index: llvm/trunk/test/Verifier/element-wise-atomic-memory-intrinsics.ll =================================================================== --- llvm/trunk/test/Verifier/element-wise-atomic-memory-intrinsics.ll +++ llvm/trunk/test/Verifier/element-wise-atomic-memory-intrinsics.ll @@ -46,4 +46,22 @@ } declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind +define void @test_memset(i8* %P, i8 %V, i32 %A, i32 %E) { + ; CHECK: element size of the element-wise unordered atomic memory intrinsic must be a constant int + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 %E) + ; CHECK: element size of the element-wise atomic memory intrinsic must be a power of 2 + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 1, i32 3) + + ; CHECK: constant length must be a multiple of the element size in the element-wise atomic memory intrinsic + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %P, i8 %V, i32 7, i32 4) + + ; CHECK: incorrect alignment of the destination argument + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* %P, i8 %V, i32 1, i32 1) + ; CHECK: incorrect alignment of the destination argument + call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %P, i8 %V, i32 4, i32 4) + + ret void +} +declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture, i8, i32, i32) nounwind + ; CHECK: input module is broken!