diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -83,6 +83,8 @@ bool expandSVESpillFill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opc, unsigned N); + bool expandCALL_RVMARKER(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); }; } // end anonymous namespace @@ -627,6 +629,46 @@ return true; } +bool AArch64ExpandPseudo::expandCALL_RVMARKER( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { + // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29, + // x29` marker. Mark the sequence as bundle, to avoid passes moving other code + // in between. + MachineInstr &MI = *MBBI; + + MachineInstr *OriginalCall; + MachineOperand &CallTarget = MI.getOperand(0); + assert((CallTarget.isGlobal() || CallTarget.isReg()) && + "invalid operand for regular call"); + unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; + OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); + OriginalCall->addOperand(CallTarget); + + unsigned RegMaskStartIdx = 1; + // Skip register arguments. Those are added during ISel, but are not + // needed for the concrete branch. + while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { + assert(MI.getOperand(RegMaskStartIdx).isReg() && + "should only skip register operands"); + RegMaskStartIdx++; + } + for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx) + OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx)); + + auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) + .addReg(AArch64::FP) + .addReg(AArch64::XZR) + .addReg(AArch64::FP) + .addImm(0) + .getInstr(); + if (MI.shouldUpdateCallSiteInfo()) + MBB.getParent()->moveCallSiteInfo(&MI, Marker); + MI.eraseFromParent(); + finalizeBundle(MBB, OriginalCall->getIterator(), + std::next(Marker->getIterator())); + return true; +} + /// If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, @@ -1014,6 +1056,8 @@ return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); case AArch64::LDR_ZZXI: return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); + case AArch64::BLR_RVMARKER: + return expandCALL_RVMARKER(MBB, MBBI); } return false; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -417,7 +417,11 @@ LDP, STP, - STNP + STNP, + + // Pseudo for a OBJC call that gets emitted together with a special `mov + // x29, x29` marker instruction. + CALL_RVMARKER }; } // end namespace AArch64ISD diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1930,6 +1930,7 @@ MAKE_CASE(AArch64ISD::INDEX_VECTOR) MAKE_CASE(AArch64ISD::UABD) MAKE_CASE(AArch64ISD::SABD) + MAKE_CASE(AArch64ISD::CALL_RVMARKER) } #undef MAKE_CASE return nullptr; @@ -5510,8 +5511,17 @@ return Ret; } + unsigned CallOpc = AArch64ISD::CALL; + // Calls marked with "rv_marker" are special. They should be expanded to the + // call, directly followed by a special marker sequence. Use the CALL_RVMARKER + // to do that. + if (CLI.CB && CLI.CB->hasRetAttr("rv_marker")) { + assert(!IsTailCall && "tail calls cannot be marked with rv_marker"); + CallOpc = AArch64ISD::CALL_RVMARKER; + } + // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops); + Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); InFlag = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -399,6 +399,12 @@ SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", + SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond, [SDNPHasChain]>; def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz, @@ -2068,6 +2074,8 @@ def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>, Sched<[WriteBrReg]>, PseudoInstExpansion<(BLR GPR64:$Rn)>; + def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, + Sched<[WriteBrReg]>; } // isCall def : Pat<(AArch64call GPR64:$Rn), @@ -2077,6 +2085,10 @@ (BLRNoIP GPR64noip:$Rn)>, Requires<[SLSBLRMitigation]>; +def : Pat<(AArch64call_rvmarker GPR64:$Rn), + (BLR_RVMARKER GPR64:$Rn)>, + Requires<[NoSLSBLRMitigation]>; + let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; } // isBranch, isTerminator, isBarrier, isIndirectBranch diff --git a/llvm/test/CodeGen/AArch64/call-rv-marker.ll b/llvm/test/CodeGen/AArch64/call-rv-marker.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/call-rv-marker.ll @@ -0,0 +1,143 @@ +; RUN: llc -o - %s | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-iphoneos" + +declare i8* @foo0(i32) +declare i8* @foo1() + +declare void @llvm.objc.release(i8*) +declare void @objc_object(i8*) + +declare void @foo2(i8*) + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare %struct.S* @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(1)) + +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + + +%struct.S = type { i8 } + +@g = global i8* null, align 8 +@fptr = global i8* ()* null, align 8 + +define i8* @rv_marker_1() { +; CHECK-LABEL: rv_marker_1: +; CHECK: .cfi_offset w30, -16 +; CHECK-NEXT: bl foo1 +; CHECK-NEXT: mov x29, x29 +; +entry: + %call = call "rv_marker" i8* @foo1() + ret i8* %call +} + +define void @rv_marker_2_select(i32 %c) { +; CHECK-LABEL: rv_marker_2_select: +; CHECK: cinc w0, w8, eq +; CHECK-NEXT: bl foo0 +; CHECK-NEXT: mov x29, x29 +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: b foo2 +; +entry: + %tobool.not = icmp eq i32 %c, 0 + %.sink = select i1 %tobool.not, i32 2, i32 1 + %call1 = call "rv_marker" i8* @foo0(i32 %.sink) + tail call void @foo2(i8* %call1) + ret void +} + +define void @rv_marker_3() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: rv_marker_3 +; CHECK: .cfi_offset w30, -32 +; CHECK-NEXT: bl foo1 +; CHECK-NEXT: mov x29, x29 +; +entry: + %call = call "rv_marker" i8* @foo1() + invoke void @objc_object(i8* %call) #5 + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %entry + tail call void @llvm.objc.release(i8* %call) + ret void + +lpad: ; preds = %entry + %0 = landingpad { i8*, i32 } + cleanup + tail call void @llvm.objc.release(i8* %call) + resume { i8*, i32 } %0 +} + +define void @rv_marker_4() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-LABEL: rv_marker_4 +; CHECK: .Ltmp3: +; CHECK-NEXT: bl foo1 +; CHECK-NEXT: mov x29, x29 +; CHECK-NEXT: .Ltmp4: +; +entry: + %s = alloca %struct.S, align 1 + %0 = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 0 + call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) #2 + %call = invoke "rv_marker" i8* @foo1() + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %entry + invoke void @objc_object(i8* %call) #5 + to label %invoke.cont2 unwind label %lpad1 + +invoke.cont2: ; preds = %invoke.cont + tail call void @llvm.objc.release(i8* %call) + %call3 = call %struct.S* @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(1) %s) + call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0) + ret void + +lpad: ; preds = %entry + %1 = landingpad { i8*, i32 } + cleanup + br label %ehcleanup + +lpad1: ; preds = %invoke.cont + %2 = landingpad { i8*, i32 } + cleanup + tail call void @llvm.objc.release(i8* %call) + br label %ehcleanup + +ehcleanup: ; preds = %lpad1, %lpad + %.pn = phi { i8*, i32 } [ %2, %lpad1 ], [ %1, %lpad ] + %call4 = call %struct.S* @_ZN1SD1Ev(%struct.S* nonnull dereferenceable(1) %s) + call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0) + resume { i8*, i32 } %.pn +} + +define i8* @rv_marker_5_indirect_call() { +; CHECK-LABEL: rv_marker_5_indirect_call +; CHECK: ldr [[ADDR:x[0-9]+]], [ +; CHECK-NEXT: blr [[ADDR]] +; CHECK-NEXT: mov x29, x29 +; +entry: + %0 = load i8* ()*, i8* ()** @fptr, align 8 + %call = call "rv_marker" i8* %0() + tail call void @foo2(i8* %call) + ret i8* %call +} + +declare void @foo(i64, i64, i64) + +define void @rv_marker_multiarg(i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: rv_marker_multiarg +; CHECK: mov [[TMP:x[0-9]+]], x0 +; CHECK-NEXT: mov x0, x2 +; CHECK-NEXT: mov x2, [[TMP]] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov x29, x29 + call "rv_marker" void @foo(i64 %c, i64 %b, i64 %a) + ret void +} + +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/CodeGen/AArch64/expand-blr-rvmarker-pseudo.mir b/llvm/test/CodeGen/AArch64/expand-blr-rvmarker-pseudo.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/expand-blr-rvmarker-pseudo.mir @@ -0,0 +1,21 @@ +# RUN: llc -run-pass=aarch64-expand-pseudo -mtriple=arm64-apple-ios -o - -emit-call-site-info %s | FileCheck %s + +# CHECK-LABEL: test_1_callsite_info +# CHECK: bb.0.entry: +# CHECK-NEXT: BUNDLE implicit-def $lr, implicit-def $w30, implicit-def $sp, implicit-def $wsp, implicit-def dead $x0, implicit $x0, implicit $sp, implicit $fp, implicit $xzr { +# CHECK-NEXT: BLR $x0, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $x0 +# CHECK-NEXT: ORRXrs $fp, $xzr, $fp, 0 +# CHECK-NEXT: } +# CHECK-NEXT: RET undef $lr, implicit killed $w0 +--- +name: test_1_callsite_info +callSites: + - {bb: 0, offset: 0, fwdArgRegs: + - { arg: 0, reg: '$x0' } } +body: | + bb.0.entry: + liveins: $lr, $x0 + + BLR_RVMARKER $x0, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $x0 + RET_ReallyLR implicit killed $w0 +...