Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -5430,6 +5430,49 @@ '``invariant.group``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The ``thread.private`` metadata may be attached to any instruction which +directly accesses memory (load, store, cmpxchg, atomicrmw). The only legal +value is an empty metadata or the index of such an empty metadata node. + +If an instruction tagged with the metadata is executed, the optimizer may +assume the memory location referenced by the instruction is thread private +at all points in the program where the memory location is known to be +dereferenceable; otherwise, the behavior is undefined. + +A thread private memory location is one which can only be accessed by a +single thread. The behavior of a program which contains a concurrent +read or write of a location marked as thread private is undefined. Such +a program is erroneous and is assumed not to exist. + +Note that the optimizer can infer that some locations are thread +private (e.g. a non-escaping alloca or allocation). Annotating uses +of such locations is not required or recommended. + +Examples of transforms which are allowed only on thread private +locations include: + +* Inserting a store to a location which is not otherwise known to have + been stored to. + +* Spilling an unrelated value into the memory location during a period + in which the contents are known to be dead. + +* Removing stores to locations whose contents are not read again by + the writing thread. + +Note that thread local storage and thread private are distinct concepts. +Thread local storage simply ensures each thread has a copy of the value, +not that the copy is uniquely accessed by a single thread. In some +implementations, the address of a thread local location can be +communicated to another thread which can then read or write from it. + +.. code-block:: llvm + + %v = load i32, i32* @G, !thread.private !{} + +'``invariant.group``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + The experimental ``invariant.group`` metadata may be attached to ``load``/``store`` instructions referencing a single metadata with no entries. The existence of the ``invariant.group`` metadata on the instruction tells Index: include/llvm/IR/LLVMContext.h =================================================================== --- include/llvm/IR/LLVMContext.h +++ include/llvm/IR/LLVMContext.h @@ -102,6 +102,7 @@ MD_associated = 22, // "associated" MD_callees = 23, // "callees" MD_irr_loop = 24, // "irr_loop" + MD_thread_private = 25, // "thread.private" }; /// Known operand bundle tag IDs, which always have the same value. All Index: lib/IR/LLVMContext.cpp =================================================================== --- lib/IR/LLVMContext.cpp +++ lib/IR/LLVMContext.cpp @@ -61,6 +61,7 @@ {MD_associated, "associated"}, {MD_callees, "callees"}, {MD_irr_loop, "irr_loop"}, + {MD_thread_private, "thread.private"}, }; for (auto &MDKind : MDKinds) { Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -1382,6 +1382,9 @@ SawUnorderedAtomic |= Load->isAtomic(); SawNotAtomic |= !Load->isAtomic(); + if (Load->getMetadata(LLVMContext::MD_thread_private)) + IsKnownThreadLocalObject = true; + if (!DereferenceableInPH) DereferenceableInPH = isSafeToExecuteUnconditionally( *Load, DT, CurLoop, SafetyInfo, ORE, Preheader->getTerminator()); @@ -1396,6 +1399,10 @@ SawUnorderedAtomic |= Store->isAtomic(); SawNotAtomic |= !Store->isAtomic(); + if (Store->getMetadata(LLVMContext::MD_thread_private)) + IsKnownThreadLocalObject = true; + + // If the store is guaranteed to execute, both properties are satisfied. // We may want to check if a store is guaranteed to execute even if we // already know that promotion is safe, since it may have higher Index: test/Transforms/LICM/promote-tls.ll =================================================================== --- test/Transforms/LICM/promote-tls.ll +++ test/Transforms/LICM/promote-tls.ll @@ -172,3 +172,96 @@ %split = phi i32* [ %addr, %for.body ] ret i32* null } + + +; Did the user tell us the location was thread private? (on the load) +; CHECK-LABEL: @test3 +define i32* @test3(i32 %n, i32* dereferenceable(8) %addr, i1 %taken) { +entry: + br label %for.header + +for.header: + %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + br i1 %taken, label %for.body, label %early-exit + +early-exit: +; CHECK-LABEL: early-exit: +; CHECK: store i32 %new1.lcssa, i32* %addr, align 1 + ret i32* null + +for.body: + %old = load i32, i32* %addr, align 4, !thread.private !{} + %new = add i32 %old, 1 + store i32 %new, i32* %addr, align 4 + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body +; CHECK-LABEL: for.cond.for.end_crit_edge: +; CHECK: store i32 %new.lcssa, i32* %addr, align 1 + %split = phi i32* [ %addr, %for.body ] + ret i32* null +} + +; Did the user tell us the location was thread private? (on the store) +; CHECK-LABEL: @test4 +define i32* @test4(i32 %n, i32* dereferenceable(8) %addr, i1 %taken) { +entry: + br label %for.header + +for.header: + %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + br i1 %taken, label %for.body, label %early-exit + +early-exit: +; CHECK-LABEL: early-exit: +; CHECK: store i32 %new1.lcssa, i32* %addr, align 1 + ret i32* null + +for.body: + %old = load i32, i32* %addr, align 4, !thread.private !{} + %new = add i32 %old, 1 + store i32 %new, i32* %addr, align 4 + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body +; CHECK-LABEL: for.cond.for.end_crit_edge: +; CHECK: store i32 %new.lcssa, i32* %addr, align 1 + %split = phi i32* [ %addr, %for.body ] + ret i32* null +} + +; Also thread.private, but maybe no derefenceable +; CHECK-LABEL: @test_neg3 +define i32* @test_neg3(i32 %n, i32* %addr, i1 %taken) { +entry: + br label %for.header + +for.header: + %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + br i1 %taken, label %for.body, label %early-exit + +early-exit: +; CHECK-LABEL: early-exit: +; CHECK-NOT: store i32 %new1.lcssa, i32* %addr, align 1 + ret i32* null + +for.body: +; CHECK-LABEL: for.body: +; CHECK: store i32 %new, i32* %addr, align 4 + %old = load i32, i32* %addr, align 4, !thread.private !{} + %new = add i32 %old, 1 + store i32 %new, i32* %addr, align 4 + %inc = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body +; CHECK-LABEL: for.cond.for.end_crit_edge: +; CHECK-NOT: store i32 %new.lcssa, i32* %addr, align 1 + %split = phi i32* [ %addr, %for.body ] + ret i32* null +}