diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c --- a/compiler-rt/lib/builtins/trampoline_setup.c +++ b/compiler-rt/lib/builtins/trampoline_setup.c @@ -41,3 +41,36 @@ __clear_cache(trampOnStack, &trampOnStack[10]); } #endif // __powerpc__ && !defined(__powerpc64__) + +#if defined(__aarch64__) && !defined(__ANDROID__) && !defined(__APPLE__) && \ + !defined(_WIN64) +static __inline void aarch64_gen_constant(uint32_t *buf, uint64_t constant, + uint64_t reg) { + // mov reg, # + buf[0] = 0xd2800000u | (((constant >> 0) & 0xffffu) << 5) | reg; + // movk reg, #, lsl #16 + buf[1] = 0xf2a00000u | (((constant >> 16) & 0xffffu) << 5) | reg; + // movk reg, #, lsl #32 + buf[2] = 0xf2c00000u | (((constant >> 32) & 0xffffu) << 5) | reg; + // movk reg, #, lsl #48 + buf[3] = 0xf2e00000u | (((constant >> 48) & 0xffffu) << 5) | reg; +} + +COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, + uint64_t trampSizeAllocated, + const void *realFunc, void *localsPtr) { + if (trampSizeAllocated < 36) // 9 32-bit instructions. + compilerrt_abort(); + + // store realFunc in x9 + aarch64_gen_constant(trampOnStack, (uint64_t)realFunc, 0x09); + // store localsPtr in x18 + aarch64_gen_constant(&trampOnStack[4], (uint64_t)localsPtr, 0x12); + // br x9 + trampOnStack[8] = 0xd61f0120; + + // clear instruction cache + __clear_cache(trampOnStack, &trampOnStack[9]); +} +#endif /* defined(__aarch64__) && !defined(__ANDROID__) && + !defined(__APPLE__) && !defined(_WIN64) */ diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c --- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c +++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c @@ -7,7 +7,7 @@ /* * Tests nested functions - * The ppc compiler generates a call to __trampoline_setup + * The ppc and aarch64 compiler generates a call to __trampoline_setup * The i386 and x86_64 compilers generate a call to ___enable_execute_stack */ diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1130,6 +1130,8 @@ SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl &Created) const override; SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -960,6 +960,10 @@ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal); + // llvm.init.trampoline and llvm.adjust.trampoline are custom lowered. + setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); + setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); + // We combine OR nodes for bitfield operations. setTargetDAGCombine(ISD::OR); // Try to create BICs for vector ANDs. @@ -5762,6 +5766,44 @@ return SDValue(); } +// Lower @llvm.init.trampoline to a call to the function __trampoline_setup() +SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, + SelectionDAG &DAG) const { + // Because executable stacks are needed for trampolines: + if (Subtarget->isTargetAndroid() || Subtarget->isTargetDarwin() || + Subtarget->isTargetWindows()) + report_fatal_error("trampoline intrinsics are " + "not supported on this platform."); + + SDValue Root = Op.getOperand(0); + SDValue Trmp = Op.getOperand(1); // trampoline + SDValue FPtr = Op.getOperand(2); // nested function + SDValue Nest = Op.getOperand(3); // 'nest' parameter + + SDLoc dl(Op); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + TargetLowering::ArgListTy Args; + + // Lower to a call to __trampoline_setup + TargetLowering::ArgListEntry Entry; + Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Entry.Node = Trmp; + Args.push_back(Entry); + Entry.Node = DAG.getConstant(36, dl, MVT::i64); + Args.push_back(Entry); + Entry.Node = FPtr; + Args.push_back(Entry); + Entry.Node = Nest; + Args.push_back(Entry); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Root).setLibCallee( + CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); + + return LowerCallTo(CLI).second; +} + SDValue AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Custom lowering: "); @@ -6069,6 +6111,10 @@ return Result; } + case ISD::ADJUST_TRAMPOLINE: + return Op.getOperand(0); + case ISD::INIT_TRAMPOLINE: + return LowerINIT_TRAMPOLINE(Op, DAG); } } diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/trampoline.ll @@ -0,0 +1,35 @@ +; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s +; UNSUPPORTED: android, darwin, system-windows + +declare void @llvm.init.trampoline(i8*, i8*, i8*); +declare i8* @llvm.adjust.trampoline(i8*); + +define i32 @foo(i32* nest %ptr, i32 %val) +{ + %x = load i32, i32* %ptr + %sum = add i32 %x, %val + ret i32 %sum +} + +; CHECK-LABEL: main +define i32 @main(i32, i8**) +{ + %closure = alloca i32 + store i32 13, i32* %closure + %closure_ptr = bitcast i32* %closure to i8* + + %tramp_buf = alloca [36 x i8], align 4 + %tramp_ptr = getelementptr [36 x i8], [36 x i8]* %tramp_buf, i32 0, i32 0 +; CHECK: bl __trampoline_setup + call void @llvm.init.trampoline( + i8* %tramp_ptr, + i8* bitcast (i32 (i32*, i32)* @foo to i8*), + i8* %closure_ptr) + %ptr = call i8* @llvm.adjust.trampoline(i8* %tramp_ptr) + %fp = bitcast i8* %ptr to i32(i32)* + + %val2 = call i32 %fp (i32 42) + + ret i32 %val2 +} +