Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -1926,6 +1926,13 @@ function that has a ``"probe-stack"`` attribute is inlined into a function that has no ``"probe-stack"`` attribute at all, the resulting function has the ``"probe-stack"`` attribute of the callee. +``noread_thread_id`` + This attribute indicates that the function does not rely on the + identity of the current thread in any way, such as by reading the + current thread ID or taking the address of a thread-local variable. + + If the function does rely on the identity of the current thread, + the behavior is undefined. ``readnone`` On a function, this attribute indicates that the function computes its result (or decides to unwind an exception) based strictly on its arguments, Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -690,6 +690,7 @@ ATTR_KIND_PRESPLIT_COROUTINE = 83, ATTR_KIND_FNRETTHUNK_EXTERN = 84, ATTR_KIND_SKIP_PROFILE = 85, + ATTR_KIND_NO_READ_THREAD_ID = 86, }; enum ComdatSelectionKindCodes { Index: llvm/include/llvm/IR/Attributes.td =================================================================== --- llvm/include/llvm/IR/Attributes.td +++ llvm/include/llvm/IR/Attributes.td @@ -214,6 +214,9 @@ /// Similar to byval but without a copy. def Preallocated : TypeAttr<"preallocated", [FnAttr, ParamAttr]>; +/// Function does not read thread ID. +def NoReadThreadID : EnumAttr<"noread_thread_id", [FnAttr]>; + /// Function does not access memory. def ReadNone : EnumAttr<"readnone", [FnAttr, ParamAttr]>; Index: llvm/include/llvm/IR/InstrTypes.h =================================================================== --- llvm/include/llvm/IR/InstrTypes.h +++ llvm/include/llvm/IR/InstrTypes.h @@ -1847,6 +1847,24 @@ /// Return true if the call should not be inlined. bool isNoInline() const { return hasFnAttr(Attribute::NoInline); } void setIsNoInline() { addFnAttr(Attribute::NoInline); } + + /// Determine if the call does not read thread ID. + bool doesNotReadThreadID() const { + return hasFnAttr(Attribute::NoReadThreadID); + } + void setDoesNotReadThreadID() { addFnAttr(Attribute::NoReadThreadID); } + + /// Return true if the call does not read thread ID or the call doesn't live + /// in a presplit coroutine function. + /// + /// This implies that the call never reads different thread ID in the parent + /// function. So that the optimizer could merge such calls if the calls does + /// not access or only reads memory. + bool canReadDifferentThreadIDIfMoved() const { + return doesNotReadThreadID() || !getFunction() || + !getFunction()->isPresplitCoroutine(); + } + /// Determine if the call does not access memory. bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); } void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); } Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2005,6 +2005,8 @@ return Attribute::Hot; case bitc::ATTR_KIND_PRESPLIT_COROUTINE: return Attribute::PresplitCoroutine; + case bitc::ATTR_KIND_NO_READ_THREAD_ID: + return Attribute::NoReadThreadID; } } Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -782,6 +782,8 @@ return bitc::ATTR_KIND_MUSTPROGRESS; case Attribute::PresplitCoroutine: return bitc::ATTR_KIND_PRESPLIT_COROUTINE; + case Attribute::NoReadThreadID: + return bitc::ATTR_KIND_NO_READ_THREAD_ID; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: Index: llvm/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -132,7 +132,9 @@ } } } - return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy(); + return CI->doesNotAccessMemory() && + CI->canReadDifferentThreadIDIfMoved() && + !CI->getType()->isVoidTy(); } return isa(Inst) || isa(Inst) || isa(Inst) || isa(Inst) || @@ -463,7 +465,8 @@ return false; CallInst *CI = dyn_cast(Inst); - if (!CI || !CI->onlyReadsMemory()) + if (!CI || !CI->onlyReadsMemory() || + !CI->canReadDifferentThreadIDIfMoved()) return false; return true; } Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -450,12 +450,16 @@ } uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) { - if (AA->doesNotAccessMemory(C)) { + if (AA->doesNotAccessMemory(C) && + C->canReadDifferentThreadIDIfMoved()) { Expression exp = createExpr(C); uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[C] = e; return e; - } else if (MD && AA->onlyReadsMemory(C)) { + } + + if (MD && AA->onlyReadsMemory(C) && + C->canReadDifferentThreadIDIfMoved()) { Expression exp = createExpr(C); auto ValNum = assignExpNewValueNum(exp); if (ValNum.second) { @@ -546,10 +550,10 @@ uint32_t v = lookupOrAdd(cdep); valueNumbering[C] = v; return v; - } else { - valueNumbering[C] = nextValueNumber; - return nextValueNumber++; } + + valueNumbering[C] = nextValueNumber; + return nextValueNumber++; } /// Returns true if a value number exists for the specified value. Index: llvm/lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/NewGVN.cpp +++ llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1610,17 +1610,25 @@ return ExprResult::some(createVariableOrConstant(ReturnedValue)); } } - if (AA->doesNotAccessMemory(CI)) { + + if (!CI->canReadDifferentThreadIDIfMoved()) + return ExprResult::none(); + + if (AA->doesNotAccessMemory(CI)) return ExprResult::some( createCallExpression(CI, TOPClass->getMemoryLeader())); - } else if (AA->onlyReadsMemory(CI)) { + + if (AA->onlyReadsMemory(CI)) { if (auto *MA = MSSA->getMemoryAccess(CI)) { auto *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(MA); return ExprResult::some(createCallExpression(CI, DefiningAccess)); - } else // MSSA determined that CI does not access memory. - return ExprResult::some( - createCallExpression(CI, TOPClass->getMemoryLeader())); + } + + // MSSA determined that CI does not access memory. + return ExprResult::some( + createCallExpression(CI, TOPClass->getMemoryLeader())); } + return ExprResult::none(); } Index: llvm/lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -922,6 +922,7 @@ case Attribute::WriteOnly: case Attribute::AllocKind: case Attribute::PresplitCoroutine: + case Attribute::NoReadThreadID: continue; // Those attributes should be safe to propagate to the extracted function. case Attribute::AlwaysInline: Index: llvm/test/Transforms/Coroutines/coro-readnone-01.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-readnone-01.ll @@ -0,0 +1,89 @@ +; Tests that the readnone function which cross suspend points wouldn't be misoptimized. +; RUN: opt < %s -S -passes='default' | FileCheck %s --check-prefixes=CHECK,CHECK_SPLITTED +; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED +; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED +; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED + +define ptr @f() presplitcoroutine { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc) + %j = call i32 @readnone_func() readnone + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %i = call i32 @readnone_func() readnone + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(ptr %hdl, i1 0) + ret ptr %hdl +} + +; Tests that normal functions wouldn't be affected. +define i1 @normal_function() { +entry: + %i = call i32 @readnone_func() readnone + %j = call i32 @readnone_func() readnone + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + ret i1 true + +diff: + call void @print_diff() + ret i1 false +} + +; CHECK_SPLITTED-LABEL: normal_function( +; CHECK_SPLITTED-NEXT: entry +; CHECK_SPLITTED-NEXT: call i32 @readnone_func() +; CHECK_SPLITTED-NEXT: call void @print_same() +; CHECK_SPLITTED-NEXT: ret i1 true +; +; CHECK_SPLITTED-LABEL: f.resume( +; CHECK_UNSPLITTED-LABEL: @f( +; CHECK: br i1 %cmp, label %same, label %diff +; CHECK-EMPTY: +; CHECK-NEXT: same: +; CHECK-NEXT: call void @print_same() +; CHECK-NEXT: br label +; CHECK-EMPTY: +; CHECK-NEXT: diff: +; CHECK-NEXT: call void @print_diff() +; CHECK-NEXT: br label + +declare i32 @readnone_func() readnone + +declare void @print_same() +declare void @print_diff() +declare ptr @llvm.coro.free(token, ptr) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, ptr, ptr, ptr) +declare i1 @llvm.coro.alloc(token) +declare ptr @llvm.coro.begin(token, ptr) +declare i1 @llvm.coro.end(ptr, i1) + +declare noalias ptr @malloc(i32) +declare void @free(ptr) Index: llvm/test/Transforms/Coroutines/coro-readnone-02.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-readnone-02.ll @@ -0,0 +1,81 @@ +; Tests that the readnone function which don't cross suspend points could be optimized expectly after split. +; +; RUN: opt < %s -S -passes='default' | FileCheck %s --check-prefixes=CHECK_SPLITTED +; RUN: opt < %s -S -passes='coro-split,early-cse,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED +; RUN: opt < %s -S -passes='coro-split,gvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED +; RUN: opt < %s -S -passes='coro-split,newgvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED +; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED +; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED +; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED + +define ptr @f() presplitcoroutine { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc) + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %i = call i32 @readnone_func() readnone + ; noop call to break optimization to combine two consecutive readonly calls. + call void @nop() + %j = call i32 @readnone_func() readnone + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(ptr %hdl, i1 0) + ret ptr %hdl +} + +; +; CHECK_SPLITTED-LABEL: f.resume( +; CHECK_SPLITTED-NEXT: : +; CHECK_SPLITTED-NEXT: call i32 @readnone_func() #[[ATTR_NUM:[0-9]+]] +; CHECK_SPLITTED-NEXT: call void @nop() +; CHECK_SPLITTED-NEXT: call void @print_same() +; +; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { readnone } +; +; CHECK_UNSPLITTED-LABEL: @f( +; CHECK_UNSPLITTED: br i1 %cmp, label %same, label %diff +; CHECK_UNSPLITTED-EMPTY: +; CHECK_UNSPLITTED-NEXT: same: +; CHECK_UNSPLITTED-NEXT: call void @print_same() +; CHECK_UNSPLITTED-NEXT: br label +; CHECK_UNSPLITTED-EMPTY: +; CHECK_UNSPLITTED-NEXT: diff: +; CHECK_UNSPLITTED-NEXT: call void @print_diff() +; CHECK_UNSPLITTED-NEXT: br label + +declare i32 @readnone_func() readnone +declare void @nop() + +declare void @print_same() +declare void @print_diff() +declare ptr @llvm.coro.free(token, ptr) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, ptr, ptr, ptr) +declare i1 @llvm.coro.alloc(token) +declare ptr @llvm.coro.begin(token, ptr) +declare i1 @llvm.coro.end(ptr, i1) + +declare noalias ptr @malloc(i32) +declare void @free(ptr) Index: llvm/test/Transforms/Coroutines/coro-readnone-03.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-readnone-03.ll @@ -0,0 +1,64 @@ +; Tests that the readnone and noread_thread_id function which cross suspend points would be optimized expectedly. +; RUN: opt < %s -S -passes='default' | FileCheck %s --check-prefixes=CHECK-O3 +; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK + +define ptr @f() presplitcoroutine { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc) + %j = call i32 @readnone_func() readnone noread_thread_id + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %i = call i32 @readnone_func() readnone noread_thread_id + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(ptr %hdl, i1 0) + ret ptr %hdl +} + +; CHECK-O3: define{{.*}}@f.resume( +; CHECK-O3-NEXT : CoroEnd: +; CHECK-O3-NEXT : tail call void @print_same() +; CHECK-O3-NEXT : tail call void @free(ptr nonnull %hdl) +; CHECK-O3-NEXT : ret void + +; CHECK-LABEL: @f( +; CHECK: resume: +; CHECK: br i1 true, label %same, label %diff + +declare i32 @readnone_func() readnone noread_thread_id + +declare void @print_same() +declare void @print_diff() +declare ptr @llvm.coro.free(token, ptr) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, ptr, ptr, ptr) +declare i1 @llvm.coro.alloc(token) +declare ptr @llvm.coro.begin(token, ptr) +declare i1 @llvm.coro.end(ptr, i1) + +declare noalias ptr @malloc(i32) +declare void @free(ptr)