Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -42,6 +42,11 @@ functionality, or simply have a lot to talk about), see the `NOTE` below for adding a new subsection. +* The ``readnone`` calls which are crossing suspend points in coroutines will + not be merged. Since ``readnone`` calls may access thread id and thread id + is not a constant in coroutines. This decision may cause unnecessary + performance regressions and we plan to fix it in later versions. + * ... Update on required toolchains to build LLVM Index: llvm/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -132,7 +132,15 @@ } } } - return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy(); + return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy() && + // FIXME: Currently the calls which may access the thread id may + // be considered as not accessing the memory. But this is + // problematic for coroutines, since coroutines may resume in a + // different thread. So we disable the optimization here for the + // correctness. However, it may block many other correct + // optimizations. Revert this one when we detect the memory + // accessing kind more precisely. + !CI->getFunction()->isPresplitCoroutine(); } return isa(Inst) || isa(Inst) || isa(Inst) || isa(Inst) || @@ -463,7 +471,15 @@ return false; CallInst *CI = dyn_cast(Inst); - if (!CI || !CI->onlyReadsMemory()) + if (!CI || !CI->onlyReadsMemory() || + // FIXME: Currently the calls which may access the thread id may + // be considered as not accessing the memory. But this is + // problematic for coroutines, since coroutines may resume in a + // different thread. So we disable the optimization here for the + // correctness. However, it may block many other correct + // optimizations. Revert this one when we detect the memory + // accessing kind more precisely. + CI->getFunction()->isPresplitCoroutine()) return false; return true; } Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -450,12 +450,28 @@ } uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) { - if (AA->doesNotAccessMemory(C)) { + if (AA->doesNotAccessMemory(C) && + // FIXME: Currently the calls which may access the thread id may + // be considered as not accessing the memory. But this is + // problematic for coroutines, since coroutines may resume in a + // different thread. So we disable the optimization here for the + // correctness. However, it may block many other correct + // optimizations. Revert this one when we detect the memory + // accessing kind more precisely. + !C->getFunction()->isPresplitCoroutine()) { Expression exp = createExpr(C); uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[C] = e; return e; - } else if (MD && AA->onlyReadsMemory(C)) { + } else if (MD && AA->onlyReadsMemory(C) && + // FIXME: Currently the calls which may access the thread id may + // be considered as not accessing the memory. But this is + // problematic for coroutines, since coroutines may resume in a + // different thread. So we disable the optimization here for the + // correctness. However, it may block many other correct + // optimizations. Revert this one when we detect the memory + // accessing kind more precisely. + !C->getFunction()->isPresplitCoroutine()) { Expression exp = createExpr(C); auto ValNum = assignExpNewValueNum(exp); if (ValNum.second) { Index: llvm/lib/Transforms/Scalar/NewGVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/NewGVN.cpp +++ llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -1607,6 +1607,17 @@ return ExprResult::some(createVariableOrConstant(ReturnedValue)); } } + + // FIXME: Currently the calls which may access the thread id may + // be considered as not accessing the memory. But this is + // problematic for coroutines, since coroutines may resume in a + // different thread. So we disable the optimization here for the + // correctness. However, it may block many other correct + // optimizations. Revert this one when we detect the memory + // accessing kind more precisely. + if (CI->getFunction()->isPresplitCoroutine()) + return ExprResult::none(); + if (AA->doesNotAccessMemory(CI)) { return ExprResult::some( createCallExpression(CI, TOPClass->getMemoryLeader())); Index: llvm/test/Transforms/Coroutines/coro-readnone-02.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-readnone-02.ll @@ -0,0 +1,81 @@ +; Tests that the readnone function which don't cross suspend points could be optimized expectly after split. +; +; RUN: opt < %s -S -passes='default' | FileCheck %s --check-prefixes=CHECK_SPLITTED +; RUN: opt < %s -S -passes='coro-split,early-cse,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED +; RUN: opt < %s -S -passes='coro-split,gvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED +; RUN: opt < %s -S -passes='coro-split,newgvn,simplifycfg' | FileCheck %s --check-prefixes=CHECK_SPLITTED +; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED +; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED +; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK_UNSPLITTED + +define ptr @f() presplitcoroutine { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc) + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %i = call i32 @readnone_func() readnone + ; noop call to break optimization to combine two consecutive readonly calls. + call void @nop() + %j = call i32 @readnone_func() readnone + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(ptr %hdl, i1 0) + ret ptr %hdl +} + +; +; CHECK_SPLITTED-LABEL: f.resume( +; CHECK_SPLITTED-NEXT: : +; CHECK_SPLITTED-NEXT: call i32 @readnone_func() #[[ATTR_NUM:[0-9]+]] +; CHECK_SPLITTED-NEXT: call void @nop() +; CHECK_SPLITTED-NEXT: call void @print_same() +; +; CHECK_SPLITTED: attributes #[[ATTR_NUM]] = { readnone } +; +; CHECK_UNSPLITTED-LABEL: @f( +; CHECK_UNSPLITTED: br i1 %cmp, label %same, label %diff +; CHECK_UNSPLITTED-EMPTY: +; CHECK_UNSPLITTED-NEXT: same: +; CHECK_UNSPLITTED-NEXT: call void @print_same() +; CHECK_UNSPLITTED-NEXT: br label +; CHECK_UNSPLITTED-EMPTY: +; CHECK_UNSPLITTED-NEXT: diff: +; CHECK_UNSPLITTED-NEXT: call void @print_diff() +; CHECK_UNSPLITTED-NEXT: br label + +declare i32 @readnone_func() readnone +declare void @nop() + +declare void @print_same() +declare void @print_diff() +declare ptr @llvm.coro.free(token, ptr) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, ptr, ptr, ptr) +declare i1 @llvm.coro.alloc(token) +declare ptr @llvm.coro.begin(token, ptr) +declare i1 @llvm.coro.end(ptr, i1) + +declare noalias ptr @malloc(i32) +declare void @free(ptr) Index: llvm/test/Transforms/Coroutines/coro-readnone.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Coroutines/coro-readnone.ll @@ -0,0 +1,89 @@ +; Tests that the readnone function which cross suspend points wouldn't be misoptimized. +; RUN: opt < %s -S -passes='default' | FileCheck %s --check-prefixes=CHECK,CHECK_SPLITTED +; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED +; RUN: opt < %s -S -passes='gvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED +; RUN: opt < %s -S -passes='newgvn' | FileCheck %s --check-prefixes=CHECK,CHECK_UNSPLITTED + +define ptr @f() presplitcoroutine { +entry: + %id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null) + %size = call i32 @llvm.coro.size.i32() + %alloc = call ptr @malloc(i32 %size) + %hdl = call ptr @llvm.coro.begin(token %id, ptr %alloc) + %j = call i32 @readnone_func() readnone + %sus_result = call i8 @llvm.coro.suspend(token none, i1 false) + switch i8 %sus_result, label %suspend [i8 0, label %resume + i8 1, label %cleanup] +resume: + %i = call i32 @readnone_func() readnone + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + br label %cleanup + +diff: + call void @print_diff() + br label %cleanup + +cleanup: + %mem = call ptr @llvm.coro.free(token %id, ptr %hdl) + call void @free(ptr %mem) + br label %suspend + +suspend: + call i1 @llvm.coro.end(ptr %hdl, i1 0) + ret ptr %hdl +} + +; Tests that normal functions wouldn't be affected. +define i1 @normal_function() { +entry: + %i = call i32 @readnone_func() readnone + %j = call i32 @readnone_func() readnone + %cmp = icmp eq i32 %i, %j + br i1 %cmp, label %same, label %diff + +same: + call void @print_same() + ret i1 true + +diff: + call void @print_diff() + ret i1 false +} + +; CHECK_SPLITTED-LABEL: normal_function( +; CHECK_SPLITTED-NEXT: entry +; CHECK_SPLITTED-NEXT: call i32 @readnone_func() +; CHECK_SPLITTED-NEXT: call void @print_same() +; CHECK_SPLITTED-NEXT: ret i1 true +; +; CHECK_SPLITTED-LABEL: f.resume( +; CHECK_UNSPLITTED-LABEL: @f( +; CHECK: br i1 %cmp, label %same, label %diff +; CHECK-EMPTY: +; CHECK-NEXT: same: +; CHECK-NEXT: call void @print_same() +; CHECK-NEXT: br label +; CHECK-EMPTY: +; CHECK-NEXT: diff: +; CHECK-NEXT: call void @print_diff() +; CHECK-NEXT: br label + +declare i32 @readnone_func() readnone + +declare void @print_same() +declare void @print_diff() +declare ptr @llvm.coro.free(token, ptr) +declare i32 @llvm.coro.size.i32() +declare i8 @llvm.coro.suspend(token, i1) + +declare token @llvm.coro.id(i32, ptr, ptr, ptr) +declare i1 @llvm.coro.alloc(token) +declare ptr @llvm.coro.begin(token, ptr) +declare i1 @llvm.coro.end(ptr, i1) + +declare noalias ptr @malloc(i32) +declare void @free(ptr)