Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -2129,7 +2129,39 @@ // 'const', 'pure' and 'noalias' attributed functions are also nounwind. if (TargetDecl->hasAttr()) { - FuncAttrs.addAttribute(llvm::Attribute::ReadNone); + if (!LangOpts.Coroutines) + FuncAttrs.addAttribute(llvm::Attribute::ReadNone); + else + // This is a workaround for the bug about pthread_self() in coroutine. + // See https://github.com/llvm/llvm-project/issues/47177 for the + // background. The reason behind the bug is that pthread_self() is + // marked as + // __attribute__((__const__)) which would be converted to `readnone` + // when get lowered to LLVM IR. The readnone function without parameters + // implies that all the calls to the function would return the same + // result. So the compiler would optimize the following code: + // + // auto a = pthread_self(); + // co_await something(); + // auto b = pthread_self(); + // + // to + // + // auto a = pthread_self(); + // co_await something(); + // // replace uses of b with a + // + // The transformation is incorrect in case the coroutine might resume in + // another thread. The key reason for the bug is the abuse of + // __attribute__((__const__)) for thread identification. However, it is + // not easy to fix the problem in the library side (we don't know if + // there are other similar problems in other places) and ask the end + // user to update the library. So we choose to block the optimization + // before we split coroutine by replacing readnone attribute as a + // placeholder so that we could rewrite readnone after we lowered + // coroutines. + FuncAttrs.addAttribute(llvm::Attribute::CoroReadNone); + FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); // gcc specifies that 'const' functions have greater restrictions than // 'pure' functions, so they also cannot have infinite loops. Index: clang/test/CodeGenCoroutines/coro-readnone.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCoroutines/coro-readnone.cpp @@ -0,0 +1,40 @@ +// This tests that +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O3 -emit-llvm %s -o - | FileCheck %s + +#include "Inputs/coroutine.h" + +struct awaitable { + bool await_ready() { return false; } + void await_suspend(std::coroutine_handle<> h); + void await_resume() {} +}; +awaitable switch_to_new_thread(); + +struct task { + struct promise_type { + task get_return_object() { return {}; } + std::suspend_never initial_suspend() { return {}; } + std::suspend_never final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception() {} + }; +}; + +__attribute__((const)) int pthread_self_mock(); + +void check(int i, int j); + +task resuming_on_new_thread() { + auto i = pthread_self_mock(); + co_await switch_to_new_thread(); + auto j = pthread_self_mock(); + if (i != j) + check(i, j); +} + +// CHECK-LABEL: define internal fastcc void @_Z22resuming_on_new_threadv.resume +// CHECK: %[[RELOAD:.+]] = load i32, ptr %[[RELOAD_ADDR:.+reload.addr.*]] +// CHECK: %[[CALL:.+]] = tail call noundef i32 @_Z17pthread_self_mockv() #[[CONST_ATTR:.+]] +// CHECK: %[[CMP:.+]] = icmp eq i32 %[[RELOAD]], %[[CALL]] +// CHECK: tail call void @_Z5checkii({{.*}}%[[RELOAD]], {{.*}}%[[CALL]]) +// CHECK: attributes #[[CONST_ATTR]] = {{.*}}coro_readnone Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -1893,6 +1893,13 @@ or has other side-effects, the behavior is undefined. If a function reads from or writes to a readnone pointer argument, the behavior is undefined. +``coro_readnone`` + On a function, this attribute indicates that the function computes its + result (or decides to unwind an exception) based strictly on its arguments + and the thread identity. The frontend should emit this attribute only if + coroutines are enabled. Otherwise, ``readnone`` should be used. In case + coroutines are enabled, the ``coro_readnone`` would be converted to ``readnone`` + after coruotines get lowered. ``readonly`` On a function, this attribute indicates that the function does not write through any pointer arguments (including ``byval`` arguments) or otherwise Index: llvm/lib/Transforms/Coroutines/CoroEarly.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -241,7 +241,7 @@ return PreservedAnalyses::all(); Lowerer L(M); - for (auto &F : M) { + for (auto &F : M) L.lowerEarlyIntrinsics(F); PreservedAnalyses PA; Index: llvm/test/Transforms/Coroutines/coro-readnone-01.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-readnone-01.ll +++ llvm/test/Transforms/Coroutines/coro-readnone-01.ll @@ -1,6 +1,5 @@ -; Tests that the readnone function attribute could be lowered correctly by -; CoroEarly pass. -; RUN: opt < %s -S -passes=coro-early -opaque-pointers | FileCheck %s +; Tests that the readnone function which cross suspend points wouldn't misoptimized +; RUN: opt < %s -S -passes='default' -opaque-pointers | FileCheck %s define ptr @f() "coroutine.presplit" { entry: @@ -13,7 +12,7 @@ switch i8 %sus_result, label %suspend [i8 0, label %resume i8 1, label %cleanup] resume: - %i = call i32 @readnone_func() readnone + %i = call i32 @readnone_func() coro_readnone %cmp = icmp eq i32 %i, %j br i1 %cmp, label %same, label %diff @@ -35,12 +34,18 @@ ret i8* %hdl } -; CHECK: %j = call i32 @readnone_func() -; CHECK: %i = call i32 @readnone_func() -; CEHCK: declare i32 @readnone_func() #[[ATTR_NUM:[0-9]+]] -; attributes #[[ATTR_NUM]] = { coro_readnone } - -declare i32 @readnone_func() readnone +; CHECK-LABEL: f.resume( +; CHECK: br i1 %cmp, label %same, label %diff +; CHECK-EMPTY: +; CHECK-NEXT: same: +; CHECK-NEXT: call void @print_same() +; CHECK-NEXT: br label +; CHECK-EMPTY: +; CHECK-NEXT: diff: +; CHECK-NEXT: call void @print_diff() +; CHECK-NEXT: br label + +declare i32 @readnone_func() coro_readnone declare void @print_same() declare void @print_diff() Index: llvm/test/Transforms/Coroutines/coro-readnone-02.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-readnone-02.ll +++ llvm/test/Transforms/Coroutines/coro-readnone-02.ll @@ -40,7 +40,7 @@ ; CHECK: %j = call i32 @readnone_func() #[[ATTR_NUM:[0-9]+]] ; CHECK: %i = call i32 @readnone_func() #[[ATTR_NUM]] ; CEHCK: declare i32 @readnone_func() #[[ATTR_NUM]] -; attributes #[[ATTR_NUM]] = { coro_readnone } +; CHECK: attributes #[[ATTR_NUM]] = { readnone } declare i32 @readnone_func() coro_readnone declare void @print_same() Index: llvm/test/Transforms/Coroutines/coro-readnone-03.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-readnone-03.ll +++ /dev/null @@ -1,62 +0,0 @@ -; Tests that the readnone function which cross suspend points wouldn't misoptimized -; RUN: opt < %s -S -passes='default' -opaque-pointers | FileCheck %s - -define ptr @f() "coroutine.presplit" { -entry: - %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) - %size = call i32 @llvm.coro.size.i32() - %alloc = call i8* @malloc(i32 %size) - %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) - %j = call i32 @readnone_func() readnone - %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) - switch i8 %sus_result, label %suspend [i8 0, label %resume - i8 1, label %cleanup] -resume: - %i = call i32 @readnone_func() readnone - %cmp = icmp eq i32 %i, %j - br i1 %cmp, label %same, label %diff - -same: - call void @print_same() - br label %cleanup - -diff: - call void @print_diff() - br label %cleanup - -cleanup: - %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) - call void @free(i8* %mem) - br label %suspend - -suspend: - call i1 @llvm.coro.end(i8* %hdl, i1 0) - ret i8* %hdl -} - -; CHECK-LABEL: f.resume( -; CHECK: br i1 %cmp, label %same, label %diff -; CHECK-EMPTY: -; CHECK-NEXT: same: -; CHECK-NEXT: call void @print_same() -; CHECK-NEXT: br label -; CHECK-EMPTY: -; CHECK-NEXT: diff: -; CHECK-NEXT: call void @print_diff() -; CHECK-NEXT: br label - -declare i32 @readnone_func() readnone - -declare void @print_same() -declare void @print_diff() -declare i8* @llvm.coro.free(token, i8*) -declare i32 @llvm.coro.size.i32() -declare i8 @llvm.coro.suspend(token, i1) - -declare token @llvm.coro.id(i32, i8*, i8*, i8*) -declare i1 @llvm.coro.alloc(token) -declare i8* @llvm.coro.begin(token, i8*) -declare i1 @llvm.coro.end(i8*, i1) - -declare noalias i8* @malloc(i32) -declare void @free(i8*)