Index: llvm/include/llvm/AsmParser/LLToken.h =================================================================== --- llvm/include/llvm/AsmParser/LLToken.h +++ llvm/include/llvm/AsmParser/LLToken.h @@ -189,6 +189,7 @@ kw_inalloca, kw_cold, kw_convergent, + kw_coro_readnone, kw_dereferenceable, kw_dereferenceable_or_null, kw_disable_sanitizer_instrumentation, Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -684,6 +684,7 @@ ATTR_KIND_NO_SANITIZE_BOUNDS = 79, ATTR_KIND_ALLOC_ALIGN = 80, ATTR_KIND_ALLOCATED_POINTER = 81, + ATTR_KIND_CORO_READ_NONE = 82, }; enum ComdatSelectionKindCodes { Index: llvm/include/llvm/IR/Attributes.td =================================================================== --- llvm/include/llvm/IR/Attributes.td +++ llvm/include/llvm/IR/Attributes.td @@ -206,6 +206,11 @@ /// Function does not access memory. def ReadNone : EnumAttr<"readnone", [FnAttr, ParamAttr]>; +/// Helper attribute to block optimizations for ReadNone attribute +/// during the lowering of coroutines to avoid misoptimizations. +/// This should be used internally only. +def CoroReadNone : EnumAttr<"coro_readnone", [FnAttr]>; + /// Function only reads from memory. def ReadOnly : EnumAttr<"readonly", [FnAttr, ParamAttr]>; Index: llvm/lib/AsmParser/LLLexer.cpp =================================================================== --- llvm/lib/AsmParser/LLLexer.cpp +++ llvm/lib/AsmParser/LLLexer.cpp @@ -642,6 +642,7 @@ KEYWORD(inalloca); KEYWORD(cold); KEYWORD(convergent); + KEYWORD(coro_readnone); KEYWORD(dereferenceable); KEYWORD(dereferenceable_or_null); KEYWORD(disable_sanitizer_instrumentation); Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1486,6 +1486,8 @@ return Attribute::Cold; case bitc::ATTR_KIND_CONVERGENT: return Attribute::Convergent; + case bitc::ATTR_KIND_CORO_READ_NONE: + return Attribute::CoroReadNone; case bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION: return Attribute::DisableSanitizerInstrumentation; case bitc::ATTR_KIND_ELEMENTTYPE: Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -629,6 +629,8 @@ return bitc::ATTR_KIND_IN_ALLOCA; case Attribute::Cold: return bitc::ATTR_KIND_COLD; + case Attribute::CoroReadNone: + return bitc::ATTR_KIND_CORO_READ_NONE; case Attribute::DisableSanitizerInstrumentation: return bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION; case Attribute::Hot: Index: llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp =================================================================== --- llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp +++ llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp @@ -623,6 +623,8 @@ return bitc::ATTR_KIND_IN_ALLOCA; case Attribute::Cold: return bitc::ATTR_KIND_COLD; + case Attribute::CoroReadNone: + return bitc::ATTR_KIND_CORO_READ_NONE; case Attribute::DisableSanitizerInstrumentation: return bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION; case Attribute::Hot: Index: llvm/lib/Transforms/Coroutines/CoroCleanup.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -117,6 +117,16 @@ } } + // Convert CoroReadNone markers to normal ReadNone attribute to enable + // further optimizations. + if (F.hasFnAttribute(Attribute::CoroReadNone)) { + F.removeFnAttr(Attribute::CoroReadNone); + F.addFnAttr(Attribute::ReadNone); + for (auto *U : F.users()) + if (auto *CB = dyn_cast(U)) + CB->addFnAttr(Attribute::ReadNone); + } + // After replacement were made we can cleanup the function body a little. simplifyCFG(F); } Index: llvm/lib/Transforms/Coroutines/CoroEarly.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -241,9 +241,47 @@ return PreservedAnalyses::all(); Lowerer L(M); - for (auto &F : M) + for (auto &F : M) { L.lowerEarlyIntrinsics(F); + // This is a workaround for the bug about pthread_self() in coroutine. + // See https://github.com/llvm/llvm-project/issues/47177 for the background. + // The reason behind the bug is that pthread_self() is marked as + // __attribute__((__const__)) which would be converted to `readnone` when + // get lowered to LLVM IR. The readnone function without parameters implies + // that all the calls to the function would return the same result. So the + // compiler would optimize the following code: + // + // auto a = pthread_self(); + // co_await something(); + // auto b = pthread_self(); + // + // to + // + // auto a = pthread_self(); + // co_await something(); + // // replace uses of b with a + // + // The transformation is incorrect in case the coroutine might resume in + // another thread. The key reason for the bug is the abuse of + // __attribute__((__const__)) for thread identification. However, it is not + // easy to fix the problem in the library side (we don't know if there are + // other similar problems in other places) and ask the end user to update + // the library. So we choose to block the optimization before we split + // coroutine by replacing readnone attribute as a placeholder so that we + // could rewrite readnone later. Here we filted LLVM intrinsics since we + // could fix LLVM intrinsics easily in case there are similar problems. + if (F.hasFnAttribute(Attribute::ReadNone) && !F.isIntrinsic()) { + F.removeFnAttr(Attribute::ReadNone); + // We don't need to add coro_readnone attribute for calls since + // coro_readnone wouldn't do anything special. + for (auto *U : F.users()) + if (auto *CB = dyn_cast(U)) + CB->removeFnAttr(Attribute::ReadNone); + F.addFnAttr(Attribute::CoroReadNone); + } + } + PreservedAnalyses PA; PA.preserveSet(); return PA; Index: llvm/test/Transforms/Coroutines/coro-readnone-01.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-readnone-01.ll @@ -0,0 +1,57 @@ +; Tests that the readnone function attribute could be lowered correctly by +; CoroEarly pass. +; RUN: opt < %s -S -passes=coro-early -opaque-pointers | FileCheck %s + +define ptr @f() "coroutine.presplit" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %j = call i32 @readnone_func() readnone + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %i = call i32 @readnone_func() readnone + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; CHECK: %j = call i32 @readnone_func() +; CHECK: %i = call i32 @readnone_func() +; CEHCK: declare i32 @readnone_func() #[[ATTR_NUM:[0-9]+]] +; attributes #[[ATTR_NUM]] = { coro_readnone } + +declare i32 @readnone_func() readnone + +declare void @print_same() +declare void @print_diff() +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @free(i8*) Index: llvm/test/Transforms/Coroutines/coro-readnone-02.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-readnone-02.ll @@ -0,0 +1,57 @@ +; Tests that the coro_readnone function attribute could be lowered correctly by +; CoroCleanup pass. +; RUN: opt < %s -S -passes=coro-cleanup -opaque-pointers | FileCheck %s +define ptr @f() #0 { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr @f, ptr null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc) #5 + %j = call i32 @readnone_func() + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [ + i8 0, label %resume + i8 1, label %cleanup + ] + +resume: ; preds = %entry + %i = call i32 @readnone_func() + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: ; preds = %resume + call void @print_same() + br label %cleanup + +diff: ; preds = %resume + call void @print_diff() + br label %cleanup + +cleanup: ; preds = %diff, %same, %entry + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend + +suspend: ; preds = %cleanup, %entry + %0 = call i1 @llvm.coro.end(ptr %hdl, i1 false) #5 + ret ptr %hdl +} + +; CHECK: %j = call i32 @readnone_func() #[[ATTR_NUM:[0-9]+]] +; CHECK: %i = call i32 @readnone_func() #[[ATTR_NUM]] +; CEHCK: declare i32 @readnone_func() #[[ATTR_NUM]] +; attributes #[[ATTR_NUM]] = { coro_readnone } + +declare i32 @readnone_func() coro_readnone +declare void @print_same() +declare void @print_diff() + +declare ptr @llvm.coro.free(token, ptr nocapture readonly) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) +declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) +declare i1 @llvm.coro.alloc(token) +declare ptr @llvm.coro.begin(token, ptr writeonly) +declare i1 @llvm.coro.end(ptr, i1) +declare noalias ptr @malloc(i32) +declare void @free(ptr) Index: llvm/test/Transforms/Coroutines/coro-readnone-03.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-readnone-03.ll @@ -0,0 +1,62 @@ +; Tests that the readnone function which cross suspend points wouldn't misoptimized +; RUN: opt < %s -S -passes='default' -opaque-pointers | FileCheck %s + +define ptr @f() "coroutine.presplit" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %j = call i32 @readnone_func() readnone + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %i = call i32 @readnone_func() readnone + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; CHECK-LABEL: f.resume( +; CHECK: br i1 %cmp, label %same, label %diff +; CHECK-EMPTY: +; CHECK-NEXT: same: +; CHECK-NEXT: call void @print_same() +; CHECK-NEXT: br label +; CHECK-EMPTY: +; CHECK-NEXT: diff: +; CHECK-NEXT: call void @print_diff() +; CHECK-NEXT: br label + +declare i32 @readnone_func() readnone + +declare void @print_same() +declare void @print_diff() +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @free(i8*)