diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -495,7 +495,7 @@ // Returns the location of the Swift asynchronous context (usually stored just // before the frame pointer), and triggers the creation of a null context if it // would otherwise be unneeded. -def int_swift_async_context_addr : Intrinsic<[llvm_ptrptr_ty], [], [IntrNoMem]>; +def int_swift_async_context_addr : Intrinsic<[llvm_ptrptr_ty], [], []>; //===--------------------- Code Generator Intrinsics ----------------------===// // diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4074,6 +4074,24 @@ } break; } + case Intrinsic::swift_async_context_addr: { + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64); + SDValue Res = SDValue( + CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP, + CurDAG->getTargetConstant(8, DL, MVT::i32), + CurDAG->getTargetConstant(0, DL, MVT::i32)), + 0); + ReplaceUses(SDValue(Node, 0), Res); + ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1)); + CurDAG->RemoveDeadNode(Node); + + auto &MF = CurDAG->getMachineFunction(); + MF.getFrameInfo().setFrameAddressIsTaken(true); + MF.getInfo()->setHasSwiftAsyncContext(true); + return; + } } } break; case ISD::INTRINSIC_WO_CHAIN: { @@ -4119,18 +4137,6 @@ if (tryMULLV64LaneV128(IntNo, Node)) return; break; - case Intrinsic::swift_async_context_addr: { - SDLoc DL(Node); - CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64, - CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, - AArch64::FP, MVT::i64), - CurDAG->getTargetConstant(8, DL, MVT::i32), - CurDAG->getTargetConstant(0, DL, MVT::i32)); - auto &MF = CurDAG->getMachineFunction(); - MF.getFrameInfo().setFrameAddressIsTaken(true); - MF.getInfo()->setHasSwiftAsyncContext(true); - return; - } } break; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -27337,27 +27337,6 @@ } return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } - case Intrinsic::swift_async_context_addr: { - auto &MF = DAG.getMachineFunction(); - auto X86FI = MF.getInfo(); - if (Subtarget.is64Bit()) { - MF.getFrameInfo().setFrameAddressIsTaken(true); - X86FI->setHasSwiftAsyncContext(true); - return SDValue( - DAG.getMachineNode( - X86::SUB64ri8, dl, MVT::i64, - DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64), - DAG.getTargetConstant(8, dl, MVT::i32)), - 0); - } else { - // 32-bit so no special extended frame, create or reuse an existing stack - // slot. - if (!X86FI->getSwiftAsyncContextFrameIdx()) - X86FI->setSwiftAsyncContextFrameIdx( - MF.getFrameInfo().CreateStackObject(4, Align(4), false)); - return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); - } - } case Intrinsic::x86_avx512_vp2intersect_q_512: case Intrinsic::x86_avx512_vp2intersect_q_256: case Intrinsic::x86_avx512_vp2intersect_q_128: @@ -27737,6 +27716,37 @@ const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); if (!IntrData) { switch (IntNo) { + + case Intrinsic::swift_async_context_addr: { + SDLoc dl(Op); + auto &MF = DAG.getMachineFunction(); + auto X86FI = MF.getInfo(); + if (Subtarget.is64Bit()) { + MF.getFrameInfo().setFrameAddressIsTaken(true); + X86FI->setHasSwiftAsyncContext(true); + SDValue Chain = Op->getOperand(0); + SDValue CopyRBP = DAG.getCopyFromReg(Chain, dl, X86::RBP, MVT::i64); + SDValue Result = + SDValue(DAG.getMachineNode(X86::SUB64ri8, dl, MVT::i64, CopyRBP, + DAG.getTargetConstant(8, dl, MVT::i32)), + 0); + // Return { result, chain }. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, + CopyRBP.getValue(1)); + } else { + // 32-bit so no special extended frame, create or reuse an existing + // stack slot. + if (!X86FI->getSwiftAsyncContextFrameIdx()) + X86FI->setSwiftAsyncContextFrameIdx( + MF.getFrameInfo().CreateStackObject(4, Align(4), false)); + SDValue Result = + DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); + // Return { result, chain }. + return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, + Op->getOperand(0)); + } + } + case llvm::Intrinsic::x86_seh_ehregnode: return MarkEHRegistrationNode(Op, DAG); case llvm::Intrinsic::x86_seh_ehguard: diff --git a/llvm/test/Transforms/Coroutines/coro-async-no-cse-swift-async-context-addr.ll b/llvm/test/Transforms/Coroutines/coro-async-no-cse-swift-async-context-addr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-async-no-cse-swift-async-context-addr.ll @@ -0,0 +1,77 @@ +; RUN: opt < %s -passes='default' -S | FileCheck --check-prefixes=CHECK %s +target datalayout = "p:64:64:64" + +%swift.async_func_pointer = type <{ i32, i32 }> +%swift.context = type { %swift.context*, void (%swift.context*)* } + +@repoTU = global %swift.async_func_pointer <{ i32 trunc (i64 sub (i64 ptrtoint (void (%swift.context*)* @repo to i64), i64 ptrtoint (%swift.async_func_pointer* @repoTU to i64)) to i32), i32 16 }>, align 8 + +declare swifttailcc void @callee.0(%swift.context* swiftasync, i8*, i64, i64) + +define internal swifttailcc void @callee(i8* %0, i64 %1, i64 %2, %swift.context* %3) { +entry: + musttail call swifttailcc void @callee.0(%swift.context* swiftasync %3, i8* %0, i64 %1, i64 %2) + ret void +} + +define swifttailcc void @repo(%swift.context* swiftasync %0) { +entry: + %1 = alloca %swift.context*, align 8 + %2 = bitcast %swift.context* %0 to <{ %swift.context*, void (%swift.context*)* }>* + %3 = call token @llvm.coro.id.async(i32 16, i32 16, i32 0, i8* bitcast (%swift.async_func_pointer* @repoTU to i8*)) + %4 = call i8* @llvm.coro.begin(token %3, i8* null) + store %swift.context* %0, %swift.context** %1, align 8 + + ; This context.addr is the address in the frame of the first partial function after splitting. + %5 = call i8** @llvm.swift.async.context.addr() + store i8* null, i8** %5, align 8 + + %6 = call i8* @llvm.coro.async.resume() + %7 = call { i8* } (i32, i8*, i8*, ...) @llvm.coro.suspend.async.sl_p0i8s(i32 0, + i8* %6, + i8* bitcast (i8* (i8*)* @__swift_async_resume_get_context to i8*), + i8* bitcast (void (i8*, i64, i64, %swift.context*)* @callee to i8*), + i8* %6, i64 0, i64 0, %swift.context* %0) + %8 = load %swift.context*, %swift.context** %1, align 8 + %9 = bitcast %swift.context* %8 to <{ %swift.context*, void (%swift.context*)* }>* + %10 = getelementptr inbounds <{ %swift.context*, void (%swift.context*)* }>, <{ %swift.context*, void (%swift.context*)* }>* %9, i32 0, i32 1 + %11 = load void (%swift.context*)*, void (%swift.context*)** %10, align 8 + %12 = load %swift.context*, %swift.context** %1, align 8 + %13 = bitcast void (%swift.context*)* %11 to i8* + + ; This context.addr is the address in the frame of the second partial function after splitting. + ; It is not valid to CSE it with the previous call. + %14 = call i8** @llvm.swift.async.context.addr() + store i8* %13, i8** %14, align 8 + + %15 = call i1 (i8*, i1, ...) @llvm.coro.end.async(i8* %4, i1 false, void (i8*, %swift.context*)* @repo.0, i8* %13, %swift.context* %12) + unreachable +} + +; Make sure we don't CSE the llvm.swift.async.context.addr calls +; CHECK: define swifttailcc void @repo +; CHECK: call i8** @llvm.swift.async.context.addr() + +; CHECK: define {{.*}}swifttailcc void @repoTY0_ +; CHECK: call i8** @llvm.swift.async.context.addr() + +define internal swifttailcc void @repo.0(i8* %0, %swift.context* %1) #1 { +entry: + %2 = bitcast i8* %0 to void (%swift.context*)* + musttail call swifttailcc void %2(%swift.context* swiftasync %1) + ret void +} + +define linkonce_odr hidden i8* @__swift_async_resume_get_context(i8* %0) #1 { +entry: + ret i8* %0 +} + +declare { i8* } @llvm.coro.suspend.async.sl_p0i8s(i32, i8*, i8*, ...) #1 +declare token @llvm.coro.id.async(i32, i32, i32, i8*) #1 +declare i8* @llvm.coro.begin(token, i8* writeonly) #1 +declare i1 @llvm.coro.end.async(i8*, i1, ...) #1 +declare i8* @llvm.coro.async.resume() #1 +declare i8** @llvm.swift.async.context.addr() #1 + +attributes #1 = { nounwind }