Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1316,6 +1316,10 @@ ReadOnly>, NoCapture>]>; +// Coroutine Lowering Intrinsics to block optimizations. Used internally by +// coroutine passes. +def int_coro_maychange : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty]>; + ///===-------------------------- Other Intrinsics --------------------------===// // def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>, Index: llvm/lib/Transforms/Coroutines/CoroCleanup.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -93,6 +93,9 @@ } else continue; break; + case Intrinsic::coro_maychange: + II->replaceAllUsesWith(II->getOperand(0)); + break; case Intrinsic::coro_async_size_replace: auto *Target = cast( cast(II->getArgOperand(0)->stripPointerCasts()) @@ -111,6 +114,7 @@ Target->replaceAllUsesWith(NewFuncPtrStruct); break; } + II->eraseFromParent(); Changed = true; } @@ -129,7 +133,7 @@ M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr", "llvm.coro.free", "llvm.coro.id", "llvm.coro.id.retcon", "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace", - "llvm.coro.async.resume"}); + "llvm.coro.async.resume", "llvm.coro.maychange"}); } PreservedAnalyses CoroCleanupPass::run(Function &F, Index: llvm/lib/Transforms/Coroutines/CoroEarly.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -244,6 +244,27 @@ for (auto &F : M) L.lowerEarlyIntrinsics(F); + // Add llvm.coro.maychange to TLS variable to avoid misoptimization. + // + // Previously, the optimizer could assume the address of a TLS variable is + // same in the same function. The assumption is broken now after we introduced + // coroutines. + // + // Note that we couldn't mark the use in coroutines only due to potentially + // inlining. It is too expensive to do CFG analysis here to mark the use for + // reachable function only. And the TLS variable should be able to be optmized + // finally after we removed llvm.coro.maychange intrinsic in CoroCleanup. + auto *MaychangeFn = Intrinsic::getDeclaration(&M, Intrinsic::coro_maychange); + for (auto &GV : M.getGlobalList()) + if (GV.isThreadLocal()) + for (auto &Use : llvm::make_early_inc_range(GV.uses())) { + auto *UserInst = dyn_cast(Use.getUser()); + if (!UserInst) + continue; + + Use.set(CallInst::Create(MaychangeFn, {&GV}, GV.getName(), UserInst)); + } + PreservedAnalyses PA; PA.preserveSet(); return PA; Index: llvm/lib/Transforms/Coroutines/Coroutines.cpp =================================================================== --- llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -88,6 +88,7 @@ "llvm.coro.id.async", "llvm.coro.id.retcon", "llvm.coro.id.retcon.once", + "llvm.coro.maychange", "llvm.coro.noop", "llvm.coro.prepare.async", "llvm.coro.prepare.retcon", Index: llvm/test/Transforms/Coroutines/coro-TLS-01.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-TLS-01.ll @@ -0,0 +1,66 @@ +; Tests that the TLS variables which cross suspend points wouldn't be misoptimized. +; RUN: opt < %s -S -passes=coro-early,sroa,early-cse,coro-split,coro-cleanup,simplifycfg -opaque-pointers | FileCheck %s + +@tls_variable = thread_local global i32 0 + +define ptr @f() "coroutine.presplit"="0" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %i = alloca ptr + %j = alloca ptr + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + store ptr @tls_variable, ptr %i + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + store ptr @tls_variable, ptr %j + %i_value = load ptr, ptr %i + %j_value = load ptr, ptr %j + %cmp = icmp eq ptr %i_value, %j_value + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; CHECK-LABEL: f.resume( +; CHECK: br i1 %cmp, label %same, label %diff +; CHECK-EMPTY: +; CHECK-NEXT: same: +; CHECK-NEXT: call void @print_same() +; CHECK-NEXT: br label %cleanup +; CHECK-EMPTY: +; CHECK-NEXT: diff: +; CHECK-NEXT: call void @print_diff() +; CHECK-NEXT: br label %cleanup + +declare void @print_same() +declare void @print_diff() +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @free(i8*) Index: llvm/test/Transforms/Coroutines/coro-TLS-02.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-TLS-02.ll @@ -0,0 +1,61 @@ +; Tests that the TLS variables which don't cross suspend points would be optimized correctly. +; RUN: opt < %s -S -passes=coro-early,coro-split,coro-cleanup,sroa,early-cse,simplifycfg -opaque-pointers | FileCheck %s + +@tls_variable = thread_local global i32 0 + +define ptr @f() "coroutine.presplit"="0" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %i = alloca ptr + %j = alloca ptr + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + store ptr @tls_variable, ptr %i + store ptr @tls_variable, ptr %j + %i_value = load ptr, ptr %i + %j_value = load ptr, ptr %j + %cmp = icmp eq ptr %i_value, %j_value + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; CHECK: void @f.resume +; CHECK-NEXT: entry.resume: +; CHECK-NEXT: call void @print_same() +; CHECK-NEXT: call void @free(ptr %hdl) +; CHECK-NEXT: ret void + +declare void @print_same() +declare void @print_diff() +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @free(i8*) Index: llvm/test/Transforms/Coroutines/coro-TLS-03.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-TLS-03.ll @@ -0,0 +1,66 @@ +; Tests that the TLS variables which cross suspend points wouldn't be misoptimized during O2 pipeline. +; RUN: opt < %s -S -passes='default' -opaque-pointers | FileCheck %s + +@tls_variable = thread_local global i32 0 + +define ptr @f() "coroutine.presplit"="0" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %i = alloca ptr + %j = alloca ptr + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + store ptr @tls_variable, ptr %i + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + store ptr @tls_variable, ptr %j + %i_value = load ptr, ptr %i + %j_value = load ptr, ptr %j + %cmp = icmp eq ptr %i_value, %j_value + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; CHECK-LABEL: f.resume( +; CHECK: br i1 %cmp, label %same, label %diff +; CHECK-EMPTY: +; CHECK-NEXT: same: +; CHECK-NEXT: call void @print_same() +; CHECK-NEXT: br label +; CHECK-EMPTY: +; CHECK-NEXT: diff: +; CHECK-NEXT: call void @print_diff() +; CHECK-NEXT: br label + +declare void @print_same() +declare void @print_diff() +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @free(i8*) Index: llvm/test/Transforms/Coroutines/coro-TLS-04.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-TLS-04.ll @@ -0,0 +1,61 @@ +; Tests that the TLS variables which don't cross suspend points would be optimized correctly during O2 pipelines. +; RUN: opt < %s -S -passes='default' -opaque-pointers | FileCheck %s + +@tls_variable = thread_local global i32 0 + +define ptr @f() "coroutine.presplit"="0" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %i = alloca ptr + %j = alloca ptr + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + store ptr @tls_variable, ptr %i + store ptr @tls_variable, ptr %j + %i_value = load ptr, ptr %i + %j_value = load ptr, ptr %j + %cmp = icmp eq ptr %i_value, %j_value + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +; CHECK: void @f.resume +; CHECK-NEXT: resume: +; CHECK-NEXT: call void @print_same( +; CHECK-NEXT: call void @free( +; CHECK-NEXT: ret void + +declare void @print_same() +declare void @print_diff() +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @free(i8*) Index: llvm/test/Transforms/Coroutines/coro-cleanup-maychange.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-cleanup-maychange.ll @@ -0,0 +1,73 @@ +; Test that coro-cleanup would convert llvm.coro.maychange intrinsics +; correctly. +; RUN: opt < %s -S -passes=coro-cleanup -opaque-pointers | FileCheck %s + +%f.Frame = type { ptr, ptr, ptr, ptr, i1 } + +@tls_variable = thread_local global i32 0 +@f.resumers = private constant [3 x ptr] [ptr @f.resume, ptr @f.destroy, ptr @f.cleanup] + +define ptr @f() { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr @f, ptr @f.resumers) + %alloc = call ptr @malloc(i32 40) + %i = alloca ptr, align 8 + %j = alloca ptr, align 8 + %hdl = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc) + %resume.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 0 + store ptr @f.resume, ptr %resume.addr, align 8 + %destroy.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 1 + store ptr @f.destroy, ptr %destroy.addr, align 8 + %i.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 2 + %j.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 3 + ; CHECK: store ptr @tls_variable, ptr %i.reload.addr, align 8 + %tls_variable1 = call ptr @llvm.coro.maychange(ptr @tls_variable) + store ptr %tls_variable1, ptr %i.reload.addr, align 8 + %index.addr2 = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 4 + store i1 false, ptr %index.addr2, align 1 + ret ptr %hdl +} + +define internal fastcc void @f.resume(ptr noalias nonnull align 8 dereferenceable(40) %hdl) { +entry.resume: + %i.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 2 + %j.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 3 + %tls_variable = call ptr @llvm.coro.maychange(ptr @tls_variable) + ; CHECK: store ptr @tls_variable, ptr %j.reload.addr, align 8 + store ptr %tls_variable, ptr %j.reload.addr, align 8 + call void @consume(ptr %i.reload.addr) + call void @consume(ptr %j.reload.addr) + call void @free(ptr %hdl) + ret void +} + +define internal fastcc void @f.destroy(ptr noalias nonnull align 8 dereferenceable(40) %hdl) { +entry.destroy: + %i.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 2 + %j.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 3 + call void @free(ptr %hdl) + ret void +} + +define internal fastcc void @f.cleanup(ptr noalias nonnull align 8 dereferenceable(40) %hdl) { +entry.cleanup: + %i.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 2 + %j.reload.addr = getelementptr inbounds %f.Frame, ptr %hdl, i32 0, i32 3 + call void @free(ptr null) + ret void +} + +declare void @consume(ptr) +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @free(i8*) + +declare ptr @llvm.coro.maychange(ptr) Index: llvm/test/Transforms/Coroutines/coro-early-maychange.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-early-maychange.ll @@ -0,0 +1,50 @@ +; Tests that coro-early would generate llvm.coro.maychange intrinsics for TLS +; variable correctly. +; RUN: opt < %s -S -passes=coro-early -opaque-pointers | FileCheck %s + +@tls_variable = thread_local global i32 0 + +define i8* @f() "coroutine.presplit"="0" { +entry: + %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call i8* @malloc(i32 %size) + %i = alloca ptr + %j = alloca ptr + %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc) + ; CHECK: %[[TLS_VARIABLE:.*]] = call ptr @llvm.coro.maychange(ptr @tls_variable) + ; CHECK-NEXT: store ptr %[[TLS_VARIABLE]], ptr %i + store ptr @tls_variable, ptr %i + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + ; CHECK: %[[TLS_VARIABLE2:.*]] = call ptr @llvm.coro.maychange(ptr @tls_variable) + ; CHECK-NEXT: store ptr %[[TLS_VARIABLE2]], ptr %j + store ptr @tls_variable, ptr %j + call void @consume(ptr %i) + call void @consume(ptr %j) + br label %cleanup + +cleanup: + %mem = call i8* @llvm.coro.free(token %id, i8* %hdl) + call void @free(i8* %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(i8* %hdl, i1 0) + ret i8* %hdl +} + +declare void @consume(ptr) +declare i8* @llvm.coro.free(token, i8*) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, i8*, i8*, i8*) +declare i1 @llvm.coro.alloc(token) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.end(i8*, i1) + +declare noalias i8* @malloc(i32) +declare void @free(i8*)