diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1644,15 +1644,19 @@ // The inliner does not know how to inline through calls with operand bundles // in general ... + Value *convergenceControlToken = nullptr; if (CB.hasOperandBundles()) { for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { - uint32_t Tag = CB.getOperandBundleAt(i).getTagID(); - // ... but it knows how to inline through "deopt" operand bundles ... - if (Tag == LLVMContext::OB_deopt) + OperandBundleUse OBU = CB.getOperandBundleAt(i); + uint32_t Tag = OBU.getTagID(); + // ... but it knows how to inline through some operand bundles. + if (Tag == LLVMContext::OB_deopt || Tag == LLVMContext::OB_funclet) continue; - // ... and "funclet" operand bundles. - if (Tag == LLVMContext::OB_funclet) + + if (Tag == LLVMContext::OB_convergencectrl) { + convergenceControlToken = OBU.Inputs[0].get(); continue; + } return InlineResult::failure("unsupported operand bundle"); } @@ -1909,13 +1913,26 @@ } // If there are any alloca instructions in the block that used to be the entry - // block for the callee, move them to the entry block of the caller. First - // calculate which instruction they should be inserted before. We insert the - // instructions at the end of the current alloca list. + // block for the callee, move them to the entry block of the caller. + // + // Also handle convergence control entry intrinsics. { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { + if (auto *intrinsic = dyn_cast(I)) { + ++I; + + if (convergenceControlToken && + intrinsic->getIntrinsicID() == + Intrinsic::experimental_convergence_entry) { + intrinsic->replaceAllUsesWith(convergenceControlToken); + intrinsic->eraseFromParent(); + } + + continue; + } + AllocaInst *AI = dyn_cast(I++); if (!AI) continue; diff --git a/llvm/test/Transforms/Inline/inline-convergent.ll b/llvm/test/Transforms/Inline/inline-convergent.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-convergent.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -inline -S %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S %s | FileCheck %s + +define void @nonconvergent_callee() alwaysinline { +; CHECK-LABEL: @nonconvergent_callee( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.anchor() + call void @f(i32 0) [ "convergencectrl"(token %token) ] + ret void +} + +define void @convergent_callee(i32 %v) convergent alwaysinline { +; CHECK-LABEL: @convergent_callee( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: call void @f(i32 [[V:%.*]]) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.entry() + call void @f(i32 %v) [ "convergencectrl"(token %token) ] + ret void +} + +define void @test_nonconvergent() { +; CHECK-LABEL: @test_nonconvergent( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN_I:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOKEN_I]]) ] +; CHECK-NEXT: ret void +; +entry: + call void @nonconvergent_callee() + ret void +} + +define void @test_convergent_basic() { +; CHECK-LABEL: @test_convergent_basic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: br i1 undef, label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %then, label %end + +then: + call void @convergent_callee(i32 0) [ "convergencectrl"(token %token) ] + br label %end + +end: + ret void +} + +define void @test_convergent_multiple() convergent { +; CHECK-LABEL: @test_convergent_multiple( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: call void @f(i32 1) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: call void @f(i32 2) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.entry() + call void @convergent_callee(i32 0) [ "convergencectrl"(token %token) ] + call void @convergent_callee(i32 1) [ "convergencectrl"(token %token) ] + call void @convergent_callee(i32 2) [ "convergencectrl"(token %token) ] + ret void +} + +define void @test_convergent_loop() { +; CHECK-LABEL: @test_convergent_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.anchor() +; CHECK-NEXT: br i1 undef, label [[HDR:%.*]], label [[END:%.*]] +; CHECK: hdr: +; CHECK-NEXT: [[TOK_LOOP:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: call void @f(i32 0) [ "convergencectrl"(token [[TOK_LOOP]]) ] +; CHECK-NEXT: br i1 undef, label [[HDR]], label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %token = call token @llvm.experimental.convergence.anchor() + br i1 undef, label %hdr, label %end + +hdr: + %tok.loop = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %token) ] + call void @convergent_callee(i32 0) [ "convergencectrl"(token %tok.loop) ] + br i1 undef, label %hdr, label %end + +end: + ret void +} + +declare void @f(i32) convergent + +declare token @llvm.experimental.convergence.entry() +declare token @llvm.experimental.convergence.anchor() +declare token @llvm.experimental.convergence.loop()