Index: llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -813,8 +813,10 @@ unsigned NumDefs = II.getNumDefs(); const MCPhysReg *ScratchRegs = nullptr; - // Handle STACKMAP and PATCHPOINT specially and then use the generic code. - if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + // Handle STACKMAP, PATCHPOINT and STATEPOINT specially and then use the + // generic code. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT || + Opc == TargetOpcode::STATEPOINT) { // Stackmaps do not have arguments and do not preserve their calling // convention. However, to simplify runtime support, they clobber the same // scratch registers as AnyRegCC. @@ -822,6 +824,10 @@ if (Opc == TargetOpcode::PATCHPOINT) { CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); NumDefs = NumResults; + // } else if (Opc == TargetOpcode::STATEPOINT) { + // CC = Node->getConstantOperandVal( + // Node->getConstantOperandVal(StatepointOpers::NCallArgsPos) + + // StatepointOpers::MetaEnd + StatepointOpers::CCOffset); } ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); } Index: llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -94,6 +94,8 @@ const MachineInstr &MI); void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); + void LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); @@ -441,8 +443,8 @@ // linker can safely perform dead code stripping. Since LLVM never // generates code that does this, it is always safe to set. OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); - emitStackMaps(SM); } + emitStackMaps(SM); } void AArch64AsmPrinter::EmitLOHs() { @@ -774,6 +776,7 @@ while (NumNOPBytes > 0) { if (MII == MBB.end() || MII->isCall() || MII->getOpcode() == AArch64::DBG_VALUE || + MII->getOpcode() == TargetOpcode::STATEPOINT || MII->getOpcode() == TargetOpcode::PATCHPOINT || MII->getOpcode() == TargetOpcode::STACKMAP) break; @@ -828,6 +831,44 @@ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); } +void AArch64AsmPrinter::LowerSTATEPOINT(MCStreamer &OutStreamer, StackMaps &SM, + const MachineInstr &MI) { + StatepointOpers SOpers(&MI); + if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { + assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + for (unsigned i = 0; i < PatchBytes; i += 4) + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0)); + } else { + // Lower call target and choose correct opcode + const MachineOperand &CallTarget = SOpers.getCallTarget(); + MCOperand CallTargetMCOp; + unsigned CallOpcode; + switch (CallTarget.getType()) { + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + MCInstLowering.lowerOperand(CallTarget, CallTargetMCOp); + CallOpcode = AArch64::BL; + break; + case MachineOperand::MO_Immediate: + CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); + CallOpcode = AArch64::BL; + break; + case MachineOperand::MO_Register: + CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); + CallOpcode = AArch64::BLR; + break; + default: + llvm_unreachable("Unsupported operand type in statepoint call target"); + break; + } + + EmitToStreamer(OutStreamer, + MCInstBuilder(CallOpcode).addOperand(CallTargetMCOp)); + } + + SM.recordStatepoint(MI); +} + void AArch64AsmPrinter::EmitFMov0(const MachineInstr &MI) { unsigned DestReg = MI.getOperand(0).getReg(); if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround()) { @@ -1070,6 +1111,9 @@ case TargetOpcode::PATCHPOINT: return LowerPATCHPOINT(*OutStreamer, SM, *MI); + case TargetOpcode::STATEPOINT: + return LowerSTATEPOINT(*OutStreamer, SM, *MI); + case TargetOpcode::PATCHABLE_FUNCTION_ENTER: LowerPATCHABLE_FUNCTION_ENTER(*MI); return; Index: llvm/lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2221,17 +2221,23 @@ .addImm(0); } -/// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before -/// the update. This is easily retrieved as it is exactly the offset that is set -/// in processFunctionBeforeFrameFinalized. int AArch64FrameLowering::getFrameIndexReferencePreferSP( const MachineFunction &MF, int FI, unsigned &FrameReg, bool IgnoreSPUpdates) const { - const MachineFrameInfo &MFI = MF.getFrameInfo(); - LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " - << MFI.getObjectOffset(FI) << "\n"); - FrameReg = AArch64::SP; - return MFI.getObjectOffset(FI); + if (IgnoreSPUpdates) { + /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP + /// before the update. This is easily retrieved as it is exactly the offset + /// that is set in processFunctionBeforeFrameFinalized. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " + << MFI.getObjectOffset(FI) << "\n"); + FrameReg = AArch64::SP; + return MFI.getObjectOffset(FI); + } else { + return resolveFrameIndexReference(MF, FI, FrameReg, + /*PreferFP=*/false, /*ForSimm=*/false) + .getBytes(); + } } /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1383,6 +1383,7 @@ case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: + case TargetOpcode::STATEPOINT: return emitPatchPoint(MI, BB); case AArch64::CATCHRET: Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -108,6 +108,13 @@ NumBytes = PatchPointOpers(&MI).getNumPatchBytes(); assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); break; + case TargetOpcode::STATEPOINT: + NumBytes = StatepointOpers(&MI).getNumPatchBytes(); + assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + // No patch bytes means a normal call inst is emitted + if (NumBytes == 0) + NumBytes = 4; + break; case AArch64::TLSDESC_CALLSEQ: // This gets lowered to an instruction sequence which takes 16 bytes NumBytes = 16; Index: llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -455,7 +455,8 @@ // Special handling of dbg_value, stackmap and patchpoint instructions. if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP || - MI.getOpcode() == TargetOpcode::PATCHPOINT) { + MI.getOpcode() == TargetOpcode::PATCHPOINT || + MI.getOpcode() == TargetOpcode::STATEPOINT) { StackOffset Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, /*PreferFP=*/true, Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -182,6 +182,10 @@ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) return TTI::TCC_Free; break; + case Intrinsic::experimental_gc_statepoint: + if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TTI::TCC_Free; + break; } return AArch64TTIImpl::getIntImmCost(Imm, Ty); } Index: llvm/test/CodeGen/AArch64/fast-isel-gc-intrinsics.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/fast-isel-gc-intrinsics.ll @@ -0,0 +1,61 @@ +; RUN: llc < %s -fast-isel + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "aarch64-unknown-linux-gnu" +; Dont crash with gc intrinsics. + +; gcrelocate call should not be an LLVM Machine Block by itself. +define i8 addrspace(1)* @test_gcrelocate(i8 addrspace(1)* %v) gc "statepoint-example" { +entry: + %tok = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %v) + %vnew = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %tok, i32 7, i32 7) + ret i8 addrspace(1)* %vnew +} + +; gcresult calls are fine in their own blocks. +define i1 @test_gcresult() gc "statepoint-example" { +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + br label %exit +exit: + ret i1 %call1 +} + +; we are okay here because we see the gcrelocate and avoid generating their own +; block. +define i1 @test_gcresult_gcrelocate(i8 addrspace(1)* %v) gc "statepoint-example" { +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %v) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + %vnew = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) + br label %exit +exit: + ret i1 %call1 +} + +define i8 addrspace(1)* @test_non_entry_block(i8 addrspace(1)* %v, i8 %val) gc "statepoint-example" { +entry: + %load = load i8, i8 addrspace(1)* %v + %cmp = icmp eq i8 %load, %val + br i1 %cmp, label %func_call, label %exit + +func_call: + call void @dummy() + %tok = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %v) + %vnew = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %tok, i32 7, i32 7) + ret i8 addrspace(1)* %vnew + +exit: + ret i8 addrspace(1)* %v + +} + +declare void @dummy() +declare void @foo() + +declare zeroext i1 @return_i1() +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) +declare i1 @llvm.experimental.gc.result.i1(token) +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) Index: llvm/test/CodeGen/AArch64/statepoint-allocas.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/statepoint-allocas.ll @@ -0,0 +1,156 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; Check that we can lower a use of an alloca both as a deopt value (where the +; exact meaning is up to the consumer of the stackmap) and as an explicit spill +; slot used for GC. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +declare zeroext i1 @return_i1() + +; Can we handle an explicit relocation slot (in the form of an alloca) given +; to the statepoint? +define i32 addrspace(1)* @test(i32 addrspace(1)* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str x0, [sp, #8] +; CHECK-NEXT: bl return_i1 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: ldr x0, [sp, #8] +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +entry: + %alloca = alloca i32 addrspace(1)*, align 8 + store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca + call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)** %alloca) + %rel = load i32 addrspace(1)*, i32 addrspace(1)** %alloca + ret i32 addrspace(1)* %rel +} + +; Can we handle an alloca as a deopt value? +define i32 addrspace(1)* @test2(i32 addrspace(1)* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str x0, [sp, #8] +; CHECK-NEXT: bl return_i1 +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +entry: + %alloca = alloca i32 addrspace(1)*, align 8 + store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca + call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 1, i32 addrspace(1)** %alloca) + ret i32 addrspace(1)* null +} + +declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) + + +; CHECK-LABEL: .section .llvm_stackmaps +; CHECK-NEXT: __LLVM_StackMaps: +; Header +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 0 +; Num Functions +; CHECK-NEXT: .word 2 +; Num LargeConstants +; CHECK-NEXT: .word 0 +; Num Callsites +; CHECK-NEXT: .word 2 + +; Functions and stack size +; CHECK-NEXT: .xword test +; CHECK-NEXT: .xword 16 +; CHECK-NEXT: .xword 1 +; CHECK-NEXT: .xword test2 +; CHECK-NEXT: .xword 16 +; CHECK-NEXT: .xword 1 + +; Large Constants +; Statepoint ID only +; CHECK: .xword 0 + +; Callsites +; The GC one +; CHECK: .word .Ltmp0-test +; CHECK: .hword 0 +; CHECK: .hword 4 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .word 0 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .word 0 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .word 0 +; Direct Spill Slot [SP+8] +; CHECK: .byte 2 +; CHECK: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK: .hword 0 +; CHECK: .word 8 +; No Padding or LiveOuts +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .p2align 3 + +; The Deopt one +; CHECK: .word .Ltmp1-test2 +; CHECK: .hword 0 +; CHECK: .hword 4 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .word 0 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .word 0 +; SmallConstant (1) +; CHECK: .byte 4 +; CHECK: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .word 1 +; Direct Spill Slot [SP+8] +; CHECK: .byte 2 +; CHECK: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK: .hword 0 +; CHECK: .word 8 + +; No Padding or LiveOuts +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .p2align 3 Index: llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -0,0 +1,250 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; This file contains a collection of basic tests to ensure we didn't +; screw up normal call lowering when there are no deopt or gc arguments. + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +%struct = type { i64, i64 } + +declare zeroext i1 @return_i1() +declare zeroext i32 @return_i32() +declare i32* @return_i32ptr() +declare float @return_float() +declare %struct @return_struct() +declare void @varargf(i32, ...) + +define i1 @test_i1_return() gc "statepoint-example" { +; CHECK-LABEL: test_i1_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl return_i1 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: and w0, w0, #0x1 +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +; This is just checking that a i1 gets lowered normally when there's no extra +; state arguments to the statepoint +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +define i32 @test_i32_return() gc "statepoint-example" { +; CHECK-LABEL: test_i32_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl return_i32 +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token) + ret i32 %call1 +} + +define i32* @test_i32ptr_return() gc "statepoint-example" { +; CHECK-LABEL: test_i32ptr_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl return_i32ptr +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0) + %call1 = call i32* @llvm.experimental.gc.result.p0i32(token %safepoint_token) + ret i32* %call1 +} + +define float @test_float_return() gc "statepoint-example" { +; CHECK-LABEL: test_float_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl return_float +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 0, i32 0, i32 0) + %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token) + ret float %call1 +} + +define %struct @test_struct_return() gc "statepoint-example" { +; CHECK-LABEL: test_struct_return: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl return_struct +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, %struct ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_structf(i64 0, i32 0, %struct ()* @return_struct, i32 0, i32 0, i32 0, i32 0) + %call1 = call %struct @llvm.experimental.gc.result.struct(token %safepoint_token) + ret %struct %call1 +} + +define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" { +; CHECK-LABEL: test_relocate: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str x0, [sp, #8] +; CHECK-NEXT: bl return_i1 +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: and w0, w0, #0x1 +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +; Check that an ununsed relocate has no code-generation impact +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a) + %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call2 +} + +define void @test_void_vararg() gc "statepoint-example" { +; CHECK-LABEL: test_void_vararg: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: mov w1, #43 +; CHECK-NEXT: bl varargf +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +; Check a statepoint wrapping a *void* returning vararg function works +entry: + %safepoint_token = tail call token (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0) + ;; if we try to use the result from a statepoint wrapping a + ;; non-void-returning varargf, we will experience a crash. + ret void +} + +define i1 @test_i1_return_patchable() gc "statepoint-example" { +; CHECK-LABEL: test_i1_return_patchable: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: nop +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: and w0, w0, #0x1 +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +; A patchable variant of test_i1_return +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 4, i1 ()*null, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +declare void @consume(i32 addrspace(1)* %obj) + +define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint-example" { +; CHECK-LABEL: test_cross_bb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x20, [sp, #-32]! +; CHECK-NEXT: stp x19, x30, [sp, #16] +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w20, -32 +; CHECK-NEXT: mov w20, w1 +; CHECK-NEXT: str x0, [sp, #8] +; CHECK-NEXT: bl return_i1 +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: tbz w20, #0, .LBB8_2 +; CHECK-NEXT: // %bb.1: // %left +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: ldr x0, [sp, #8] +; CHECK-NEXT: bl consume +; CHECK-NEXT: and w0, w19, #0x1 +; CHECK-NEXT: b .LBB8_3 +; CHECK-NEXT: .LBB8_2: // %right +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: .LBB8_3: // %right +; CHECK-NEXT: ldp x19, x30, [sp, #16] +; CHECK-NEXT: ldr x20, [sp], #32 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a) + br i1 %external_cond, label %left, label %right + +left: + %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + call void @consume(i32 addrspace(1)* %call1) + ret i1 %call2 + +right: + ret i1 true +} + +%struct2 = type { i64, i64, i64 } + +declare void @consume_attributes(i32, i8* nest, i32, %struct2* byval) + +define void @test_attributes(%struct2* byval %s) gc "statepoint-example" { +; CHECK-LABEL: test_attributes: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: ldr x8, [sp, #64] +; CHECK-NEXT: ldr q0, [sp, #48] +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: mov w1, #17 +; CHECK-NEXT: mov x18, xzr +; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bl consume_attributes +; CHECK-NEXT: .Ltmp9: +; CHECK-NEXT: ldr x30, [sp, #32] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: +; Check that arguments with attributes are lowered correctly. +; We call a function that has a nest argument and a byval argument. + %statepoint_token = call token (i64, i32, void (i32, i8*, i32, %struct2*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32p0i8i32p0s_struct2sf(i64 0, i32 0, void (i32, i8*, i32, %struct2*)* @consume_attributes, i32 4, i32 0, i32 42, i8* nest null, i32 17, %struct2* byval %s, i32 0, i32 0) + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) +declare i1 @llvm.experimental.gc.result.i1(token) + +declare token @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...) +declare i32 @llvm.experimental.gc.result.i32(token) + +declare token @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...) +declare i32* @llvm.experimental.gc.result.p0i32(token) + +declare token @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...) +declare float @llvm.experimental.gc.result.f32(token) + +declare token @llvm.experimental.gc.statepoint.p0f_structf(i64, i32, %struct ()*, i32, i32, ...) +declare %struct @llvm.experimental.gc.result.struct(token) + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...) + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32p0i8i32p0s_struct2sf(i64, i32, void (i32, i8*, i32, %struct2*)*, i32, i32, ...) + +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) Index: llvm/test/CodeGen/AArch64/statepoint-forward.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/statepoint-forward.ll @@ -0,0 +1,106 @@ +; RUN: opt -O3 -S < %s | FileCheck --check-prefix=CHECK-OPT %s +; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LLC %s +; These tests are targetted at making sure we don't retain information +; about memory which contains potential gc references across a statepoint. +; They're carefully written to only outlaw forwarding of references. +; Depending on the collector, forwarding non-reference fields or +; constant null references may be perfectly legal. (If unimplemented.) +; The general structure of these tests is: +; - learn a fact about memory (via an assume) +; - cross a statepoint +; - check the same fact about memory (which we no longer know) + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; If not at a statepoint, we could forward known memory values +; across this call. +declare void @func() readonly + +;; Forwarding the value of a pointer load is invalid since it may have +;; changed at the safepoint. Forwarding a non-gc pointer value would +;; be valid, but is not currently implemented. +define i1 @test_load_forward(i32 addrspace(1)* addrspace(1)* %p) gc "statepoint-example" { +entry: + %before = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p + %cmp1 = call i1 @f(i32 addrspace(1)* %before) + call void @llvm.assume(i1 %cmp1) + %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p) + %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(token %safepoint_token, i32 7, i32 7) + %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew + %cmp2 = call i1 @f(i32 addrspace(1)* %after) + ret i1 %cmp2 + +; CHECK-OPT-LABEL: test_load_forward +; CHECK-OPT: ret i1 %cmp2 +; CHECK-LLC-LABEL: test_load_forward +; CHECK-LLC: bl f +} + +;; Same as above, but forwarding from a store +define i1 @test_store_forward(i32 addrspace(1)* addrspace(1)* %p, + i32 addrspace(1)* %v) gc "statepoint-example" { +entry: + %cmp1 = call i1 @f(i32 addrspace(1)* %v) + call void @llvm.assume(i1 %cmp1) + store i32 addrspace(1)* %v, i32 addrspace(1)* addrspace(1)* %p + %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p) + %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(token %safepoint_token, i32 7, i32 7) + %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew + %cmp2 = call i1 @f(i32 addrspace(1)* %after) + ret i1 %cmp2 + +; CHECK-OPT-LABEL: test_store_forward +; CHECK-OPT: ret i1 %cmp2 +; CHECK-LLC-LABEL: test_store_forward +; CHECK-LLC: bl f +} + +; A predicate on the pointer which is not simply null, but whose value +; would be known unchanged if the pointer value could be forwarded. +; The implementation of such a function could inspect the integral value +; of the pointer and is thus not safe to reuse after a statepoint. +declare i1 @f(i32 addrspace(1)* %v) readnone + +; This is a variant of the test_load_forward test which is intended to +; highlight the fact that a gc pointer can be stored in part of the heap +; that is not itself GC managed. The GC may have an external mechanism +; to know about and update that value at a safepoint. Note that the +; statepoint does not provide the collector with this root. +define i1 @test_load_forward_nongc_heap(i32 addrspace(1)** %p) gc "statepoint-example" { +entry: + %before = load i32 addrspace(1)*, i32 addrspace(1)** %p + %cmp1 = call i1 @f(i32 addrspace(1)* %before) + call void @llvm.assume(i1 %cmp1) + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) + %after = load i32 addrspace(1)*, i32 addrspace(1)** %p + %cmp2 = call i1 @f(i32 addrspace(1)* %after) + ret i1 %cmp2 + +; CHECK-OPT-LABEL: test_load_forward_nongc_heap +; CHECK-OPT: ret i1 %cmp2 +; CHECK-LLC-LABEL: test_load_forward_nongc_heap +; CHECK-LLC: bl f +} + +;; Same as above, but forwarding from a store +define i1 @test_store_forward_nongc_heap(i32 addrspace(1)** %p, + i32 addrspace(1)* %v) gc "statepoint-example" { +entry: + %cmp1 = call i1 @f(i32 addrspace(1)* %v) + call void @llvm.assume(i1 %cmp1) + store i32 addrspace(1)* %v, i32 addrspace(1)** %p + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) + %after = load i32 addrspace(1)*, i32 addrspace(1)** %p + %cmp2 = call i1 @f(i32 addrspace(1)* %after) + ret i1 %cmp2 + +; CHECK-OPT-LABEL: test_store_forward_nongc_heap +; CHECK-OPT: ret i1 %cmp2 +; CHECK-LLC-LABEL: test_store_forward_nongc_heap +; CHECK-LLC: bl f +} + +declare void @llvm.assume(i1) +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(token, i32, i32) #3 Index: llvm/test/CodeGen/AArch64/statepoint-invoke.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/statepoint-invoke.ll @@ -0,0 +1,265 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s 2>&1 | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +declare void @"some_call"(i64 addrspace(1)*) +declare i64 addrspace(1)* @"some_other_call"(i64 addrspace(1)*) + +declare i32 @"personality_function"() + +define i64 addrspace(1)* @test_basic(i64 addrspace(1)* %obj, +; CHECK-LABEL: test_basic: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x0, x30, [sp, #8] +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str x1, [sp, #24] +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: bl some_call +; CHECK-NEXT: .Ltmp18: +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: // %bb.1: // %invoke_safepoint_normal_dest +; CHECK-NEXT: ldr x0, [sp, #8] +; CHECK-NEXT: .LBB0_2: // %invoke_safepoint_normal_dest +; CHECK-NEXT: ldr x30, [sp, #16] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: // %exceptional_return +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: ldr x0, [sp, #24] +; CHECK-NEXT: b .LBB0_2 + i64 addrspace(1)* %obj1) +gc "statepoint-example" personality i32 ()* @"personality_function" { +entry: + %0 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) + to label %invoke_safepoint_normal_dest unwind label %exceptional_return + +invoke_safepoint_normal_dest: + %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 13, i32 13) + %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 14, i32 14) + br label %normal_return + +normal_return: + ret i64 addrspace(1)* %obj.relocated + +exceptional_return: + %landing_pad = landingpad token + cleanup + %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13) + %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14) + ret i64 addrspace(1)* %obj1.relocated1 +} +; CHECK-LABEL: GCC_except_table{{[0-9]+}}: +; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} +; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}} +; CHECK: .byte 0 +; CHECK: .p2align 2 + +define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, +; CHECK-LABEL: test_result: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str x0, [sp, #8] +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: bl some_other_call +; CHECK-NEXT: .Ltmp19: +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: // %bb.1: // %normal_return +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %exceptional_return +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: ldr x0, [sp, #8] +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret + i64 addrspace(1)* %obj1) + gc "statepoint-example" personality i32 ()* @personality_function { +entry: + %0 = invoke token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 0, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @some_other_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) + to label %normal_return unwind label %exceptional_return + +normal_return: + %ret_val = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token %0) + ret i64 addrspace(1)* %ret_val + +exceptional_return: + %landing_pad = landingpad token + cleanup + %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13) + ret i64 addrspace(1)* %obj.relocated +} +; CHECK-LABEL: GCC_except_table{{[0-9]+}}: +; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} +; CHECK: .uleb128 .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}} +; CHECK: .byte 0 +; CHECK: .p2align 2 + +define i64 addrspace(1)* @test_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) +; CHECK-LABEL: test_same_val: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x19, x30, [sp, #16] +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: tbz w0, #0, .LBB2_3 +; CHECK-NEXT: // %bb.1: // %left +; CHECK-NEXT: stp x1, x2, [sp] +; CHECK-NEXT: .Ltmp9: +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: bl some_call +; CHECK-NEXT: .Ltmp20: +; CHECK-NEXT: .Ltmp10: +; CHECK-NEXT: // %bb.2: // %left.relocs +; CHECK-NEXT: ldp x8, x9, [sp] +; CHECK-NEXT: b .LBB2_5 +; CHECK-NEXT: .LBB2_3: // %right +; CHECK-NEXT: stp x2, x3, [sp] +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: bl some_call +; CHECK-NEXT: .Ltmp21: +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: // %bb.4: // %right.relocs +; CHECK-NEXT: ldp x9, x8, [sp] +; CHECK-NEXT: .LBB2_5: // %normal_return +; CHECK-NEXT: tst w19, #0x1 +; CHECK-NEXT: csel x0, x8, x9, ne +; CHECK-NEXT: .LBB2_6: // %normal_return +; CHECK-NEXT: ldp x19, x30, [sp, #16] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_7: // %exceptional_return.right +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: ldr x0, [sp] +; CHECK-NEXT: b .LBB2_6 +; CHECK-NEXT: .LBB2_8: // %exceptional_return.left +; CHECK-NEXT: .Ltmp11: +; CHECK-NEXT: ldr x0, [sp] +; CHECK-NEXT: b .LBB2_6 + gc "statepoint-example" personality i32 ()* @"personality_function" { +entry: + br i1 %cond, label %left, label %right + +left: + %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2) + to label %left.relocs unwind label %exceptional_return.left + +left.relocs: + %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 13, i32 13) + %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 14, i32 14) + br label %normal_return + +right: + %sp2 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) + to label %right.relocs unwind label %exceptional_return.right + +right.relocs: + %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 13, i32 13) + %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 14, i32 14) + br label %normal_return + +normal_return: + %a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs] + %a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs] + %ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2 + ret i64 addrspace(1)* %ret + +exceptional_return.left: + %landing_pad = landingpad token + cleanup + %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13) + ret i64 addrspace(1)* %val.relocated2 + +exceptional_return.right: + %landing_pad1 = landingpad token + cleanup + %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad1, i32 13, i32 13) + ret i64 addrspace(1)* %val.relocated3 +} + +define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1) +; CHECK-LABEL: test_null_undef: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .Ltmp12: +; CHECK-NEXT: bl some_call +; CHECK-NEXT: .Ltmp22: +; CHECK-NEXT: .Ltmp13: +; CHECK-NEXT: .LBB3_1: // %normal_return +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %exceptional_return +; CHECK-NEXT: .Ltmp14: +; CHECK-NEXT: b .LBB3_1 + gc "statepoint-example" personality i32 ()* @"personality_function" { +entry: + %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef) + to label %normal_return unwind label %exceptional_return + +normal_return: + %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 13, i32 13) + %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 14, i32 14) + ret i64 addrspace(1)* %null.relocated + +exceptional_return: + %landing_pad = landingpad token + cleanup + %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13) + %undef.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14) + ret i64 addrspace(1)* %null.relocated2 +} + +define i64 addrspace(1)* @test_alloca_and_const(i64 addrspace(1)* %val1) +; CHECK-LABEL: test_alloca_and_const: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .Ltmp15: +; CHECK-NEXT: bl some_call +; CHECK-NEXT: .Ltmp23: +; CHECK-NEXT: .Ltmp16: +; CHECK-NEXT: // %bb.1: // %normal_return +; CHECK-NEXT: add x0, sp, #12 +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: // %exceptional_return +; CHECK-NEXT: .Ltmp17: +; CHECK-NEXT: mov w0, #15 +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret + gc "statepoint-example" personality i32 ()* @"personality_function" { +entry: + %a = alloca i32 + %aa = addrspacecast i32* %a to i32 addrspace(1)* + %c = inttoptr i64 15 to i64 addrspace(1)* + %sp = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %aa, i64 addrspace(1)* %c) + to label %normal_return unwind label %exceptional_return + +normal_return: + %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %sp, i32 13, i32 13) + %aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)* + ret i64 addrspace(1)* %aa.converted + +exceptional_return: + %landing_pad = landingpad token + cleanup + %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14) + ret i64 addrspace(1)* %aa.rel2 +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) +declare token @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) + +declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) +declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token) Index: llvm/test/CodeGen/AArch64/statepoint-stack-usage.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/statepoint-stack-usage.ll @@ -0,0 +1,136 @@ +; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +; This test is checking to make sure that we reuse the same stack slots +; for GC values spilled over two different call sites. Since the order +; of GC arguments differ, niave lowering code would insert loads and +; stores to rearrange items on the stack. We need to make sure (for +; performance) that this doesn't happen. +define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" { +; CHECK-LABEL: back_to_back_calls +; The exact stores don't matter, but there need to be three stack slots created +; CHECK-DAG: stp x2, x30, [sp, #8] +; CHECK-DAG: str x0, [sp, #24] +; CHECK-DAG: str x1, [sp] +; There should be no more than three moves +; CHECK-NOT: str +; CHECK-NOT: stp + %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) + %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12) + %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13) + %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 14) +; CHECK: blr +; This is the key check. There should NOT be any memory moves here +; CHECK-NOT: stp +; CHECK-NOT: str + %safepoint_token2 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1) + %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 14) + %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 13) + %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 12) +; CHECK: blr + ret i32 1 +} + +; This test simply checks that minor changes in vm state don't prevent slots +; being reused for gc values. +define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" { +; CHECK-LABEL: reserve_first +; The exact stores don't matter, but there need to be three stack slots created +; CHECK-DAG: stp x2, x30, [sp, #8] +; CHECK-DAG: str x0, [sp, #24] +; CHECK-DAG: str x1, [sp] + %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) + %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12) + %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13) + %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 14) +; CHECK: blr +; This is the key check. There should NOT be any memory moves here +; CHECK-NOT: stp +; CHECK-NOT: str + %safepoint_token2 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 addrspace(1)* %a1, i32 0, i32 addrspace(1)* %c1, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1) + %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 14) + %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 13) + %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 12) +; CHECK: blr + ret i32 1 +} + +; Check that we reuse the same stack slot across multiple calls. The use of +; more than two calls here is critical. We've had a bug which allowed reuse +; exactly once which went undetected for a long time. +define i32 @back_to_back_deopt(i32 %a, i32 %b, i32 %c) #1 + gc "statepoint-example" { +; CHECK-LABEL: back_to_back_deopt +; The exact stores don't matter, but there need to be three stack slots created +; CHECK-DAG: stp w1, w0, [sp, #8] +; CHECK-DAG: str w2, [sp, #4] +; CHECK: blr +; CHECK-DAG: stp w20, w21, [sp, #8] +; CHECK-DAG: str w19, [sp, #4] +; CHECK: blr +; CHECK-DAG: stp w20, w21, [sp, #8] +; CHECK-DAG: str w19, [sp, #4] +; CHECK: blr +; CHECK-DAG: stp w20, w21, [sp, #8] +; CHECK-DAG: str w19, [sp, #4] +; CHECK: blr + call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) +call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) +call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) +call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 3, i32 %a, i32 %b, i32 %c) + ret i32 1 +} + +; Test that stack slots are reused for invokes +define i32 @back_to_back_invokes(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) #1 gc "statepoint-example" personality i32 ()* @"personality_function" { +; CHECK-LABEL: back_to_back_invokes +entry: + ; The exact stores don't matter, but there need to be three stack slots created + ; CHECK-DAG: stp x2, x30, [sp, #8] + ; CHECK-DAG: str x0, [sp, #24] + ; CHECK-DAG: str x1, [sp] + ; CHECK: blr + %safepoint_token = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c) + to label %normal_return unwind label %exceptional_return + +normal_return: + %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12) + %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13) + %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 14) + ; Should work even through bitcasts + %c1.casted = bitcast i32 addrspace(1)* %c1 to i8 addrspace(1)* + ; This is the key check. There should NOT be any memory moves here + ; CHECK-NOT: stp + ; CHECK-NOT: str + ; CHECK: blr + %safepoint_token2 = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %c1.casted, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1) + to label %normal_return2 unwind label %exceptional_return2 + +normal_return2: + %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 14) + %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 13) + %c2 = tail call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 12, i32 12) + ret i32 1 + +exceptional_return: + %landing_pad = landingpad { i8*, i32 } + cleanup + ret i32 0 + +exceptional_return2: + %landing_pad2 = landingpad { i8*, i32 } + cleanup + ret i32 0 +} + +; Function Attrs: nounwind +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3 +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) #3 + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) + +declare i32 @"personality_function"() + +attributes #1 = { uwtable } Index: llvm/test/CodeGen/AArch64/statepoint-stackmap-format.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/statepoint-stackmap-format.ll @@ -0,0 +1,504 @@ +; RUN: llc < %s -verify-machineinstrs -stack-symbol-ordering=0 -mtriple="aarch64-unknown-linux-gnu" | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -stack-symbol-ordering=0 -mtriple="aarch64-unknown-unknown-elf" | FileCheck %s + +; This test is a sanity check to ensure statepoints are generating StackMap +; sections correctly. This is not intended to be a rigorous test of the +; StackMap format (see the stackmap tests for that). + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" + +declare zeroext i1 @return_i1() + +define i1 @test(i32 addrspace(1)* %ptr_base, i32 %arg) + gc "statepoint-example" { +; CHECK-LABEL: test: +; Do we see two spills for the local values and the store to the +; alloca? +; CHECK: sub sp, sp, #48 +; CHECK: stp x30, x0, [sp, #32] +; CHECK: stp x8, xzr, [sp, #8] +; CHECK: bl return_i1 +; CHECK: add sp, sp, #48 +; CHECK: ret +entry: + %metadata1 = alloca i32 addrspace(1)*, i32 2, align 8 + store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1 + %ptr_derived = getelementptr i32, i32 addrspace(1)* %ptr_base, i32 %arg + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 9) + %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 10) + %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 11, i32 11) +; + ret i1 %call1 +} + +; This is similar to the previous test except that we have derived pointer as +; argument to the function. Despite that this can not happen after the +; RewriteSafepointForGC pass, lowering should be able to handle it anyway. +define i1 @test_derived_arg(i32 addrspace(1)* %ptr_base, + i32 addrspace(1)* %ptr_derived) + gc "statepoint-example" { +; CHECK-LABEL: test_derived_arg +; Do we see two spills for the local values and the store to the +; alloca? +; CHECK: sub sp, sp, #48 +; CHECK: stp x30, x0, [sp, #32] +; CHECK: stp x1, xzr, [sp, #8] +; CHECK: bl return_i1 +; CHECK: add sp, sp, #48 +; CHECK: ret +entry: + %metadata1 = alloca i32 addrspace(1)*, i32 2, align 8 + store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1 + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 9) + %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 10) + %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 11, i32 11) +; + ret i1 %call1 +} + +; Simple test case to check that we emit the ID field correctly +define i1 @test_id() gc "statepoint-example" { +; CHECK-LABEL: test_id +entry: + %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 237, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +; This test checks that when SP is changed in the function +; (e.g. passing arguments on stack), the stack map entry +; takes this adjustment into account. +declare void @many_arg(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64) + +define i32 @test_spadj(i32 addrspace(1)* %p) gc "statepoint-example" { + ; CHECK-LABEL: test_spadj + ; CHECK: stp x30, x0, [sp, #16] + ; CHECK: mov x0, xzr + ; CHECK: mov x1, xzr + ; CHECK: mov x2, xzr + ; CHECK: mov x3, xzr + ; CHECK: mov x4, xzr + ; CHECK: mov x5, xzr + ; CHECK: mov x6, xzr + ; CHECK: mov x7, xzr + ; CHECK: stp xzr, xzr, [sp] + ; CHECK: bl many_arg + ; CHECK: ldp x30, x8, [sp, #16] + %statepoint_token = call token (i64, i32, void (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi64i64i64i64i64i64i64i64i64i64f(i64 0, i32 0, void (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)* @many_arg, i32 10, i32 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i32 0, i32 0, i32 addrspace(1)* %p) + %p.relocated = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %statepoint_token, i32 17, i32 17) ; (%p, %p) + %ld = load i32, i32 addrspace(1)* %p.relocated + ret i32 %ld +} + +; Test that function arguments at fixed stack offset +; can be directly encoded in the stack map, without +; spilling. +%struct = type { i64, i64, i64 } + +declare void @use(%struct*) + +define void @test_fixed_arg(%struct* byval %x) gc "statepoint-example" { +; CHECK-LABEL: test_fixed_arg +; CHECK: str x30, [sp, #-16]! +; CHECK: add x0, sp, #16 +; Should not spill fixed stack address. +; CHECK-NOT: str x0, [sp] +; CHECK: bl use +; CHECK: ldr x30, [sp], #16 +; CHECK: ret +entry: + br label %bb + +bb: ; preds = %entry + %statepoint_token = call token (i64, i32, void (%struct*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp0s_structsf(i64 0, i32 0, void (%struct*)* @use, i32 1, i32 0, %struct* %x, i32 0, i32 1, %struct* %x) + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) +declare token @llvm.experimental.gc.statepoint.p0f_isVoidi64i64i64i64i64i64i64i64i64i64f(i64, i32, void (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)*, i32, i32, ...) +declare token @llvm.experimental.gc.statepoint.p0f_isVoidp0s_structsf(i64, i32, void (%struct*)*, i32, i32, ...) +declare i1 @llvm.experimental.gc.result.i1(token) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3 + +; CHECK-LABEL: .section .llvm_stackmaps +; CHECK-NEXT: __LLVM_StackMaps: +; Header +; CHECK-NEXT: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK-NEXT: .hword 0 +; Num Functions +; CHECK-NEXT: .word 5 +; Num LargeConstants +; CHECK-NEXT: .word 0 +; Num Callsites +; CHECK-NEXT: .word 5 + +; Functions and stack size +; CHECK-NEXT: .xword test +; CHECK-NEXT: .xword 48 +; CHECK-NEXT: .xword 1 +; CHECK-NEXT: .xword test_derived_arg +; CHECK-NEXT: .xword 48 +; CHECK-NEXT: .xword 1 +; CHECK-NEXT: .xword test_id +; CHECK-NEXT: .xword 16 +; CHECK-NEXT: .xword 1 +; CHECK-NEXT: .xword test_spadj +; CHECK-NEXT: .xword 32 +; CHECK-NEXT: .xword 1 +; CHECK-NEXT: .xword test_fixed_arg +; CHECK-NEXT: .xword 16 +; CHECK-NEXT: .xword 1 + +; +; test +; + +; Statepoint ID +; CHECK-NEXT: .xword 0 + +; Callsites +; Constant arguments +; CHECK-NEXT: .word .Ltmp0-test +; CHECK: .hword 0 +; CHECK: .hword 11 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 +; SmallConstant (2) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 2 +; Indirect Spill Slot [SP+40] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 40 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 +; Indirect Spill Slot [SP+40] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 40 +; Indirect Spill Slot [SP+8] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 8 +; Indirect Spill Slot [SP+40] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 40 +; Indirect Spill Slot [SP+40] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 40 + +; No Padding or LiveOuts +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .p2align 3 + +; +; test_derived_arg + +; Statepoint ID +; CHECK-NEXT: .xword 0 + +; Callsites +; Constant arguments +; CHECK-NEXT: .word .Ltmp1-test_derived_arg +; CHECK: .hword 0 +; CHECK: .hword 11 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 +; SmallConstant (2) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 2 +; Indirect Spill Slot [SP+40] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 40 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 +; SmallConstant (0) +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 +; Indirect Spill Slot [SP+40] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 40 +; Indirect Spill Slot [SP+8] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 8 +; Indirect Spill Slot [SP+40] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 40 +; Indirect Spill Slot [SP+40] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 40 + +; No Padding or LiveOuts +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .p2align 3 + +; Records for the test_id function: + +; The Statepoint ID: +; CHECK-NEXT: .xword 237 + +; Instruction Offset +; CHECK-NEXT: .word .Ltmp2-test_id + +; Reserved: +; CHECK: .hword 0 + +; NumLocations: +; CHECK: .hword 3 + +; StkMapRecord[0]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 + +; StkMapRecord[1]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 + +; StkMapRecord[2]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 + +; No padding or LiveOuts +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .p2align 3 + +; +; test_spadj + +; Statepoint ID +; CHECK-NEXT: .xword 0 + +; Instruction Offset +; CHECK-NEXT: .word .Ltmp3-test_spadj + +; Reserved: +; CHECK: .hword 0 + +; NumLocations: +; CHECK: .hword 5 + +; StkMapRecord[0]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 + +; StkMapRecord[1]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 + +; StkMapRecord[2]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 + +; StkMapRecord[3]: +; Indirect Spill Slot [SP+24] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 24 + +; StkMapRecord[4]: +; Indirect Spill Slot [SP+24] +; CHECK: .byte 3 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 24 + +; No padding or LiveOuts +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .p2align 3 + +; +; test_fixed_arg + +; Statepoint ID +; CHECK-NEXT: .xword 0 + +; Instruction Offset +; CHECK-NEXT: .word .Ltmp4-test_fixed_arg + +; Reserved: +; CHECK: .hword 0 + +; NumLocations: +; CHECK: .hword 4 + +; StkMapRecord[0]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 + +; StkMapRecord[1]: +; SmallConstant(0): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 0 + +; StkMapRecord[2]: +; SmallConstant(1): +; CHECK: .byte 4 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 0 +; CHECK-NEXT: .hword 0 +; CHECK: .word 1 + +; StkMapRecord[3]: +; Direct SP+16 +; CHECK: .byte 2 +; CHECK-NEXT: .byte 0 +; CHECK: .hword 8 +; CHECK: .hword 31 +; CHECK-NEXT: .hword 0 +; CHECK: .word 16 + +; No padding or LiveOuts +; CHECK: .hword 0 +; CHECK: .hword 0 +; CHECK: .p2align 3 Index: llvm/test/CodeGen/AArch64/statepoint-uniqueing.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/statepoint-uniqueing.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +declare void @use(...) +declare void @f() +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3 +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) #3 + +;; Two gc.relocates of the same input, should require only a single spill/fill +define void @test_gcrelocate_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test_gcrelocate_uniqueing: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x30, x0, [sp, #16] +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl f +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: ldr x0, [sp, #24] +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: bl use +; CHECK-NEXT: ldr x30, [sp, #16] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + %tok = tail call token (i64, i32, void ()*, i32, i32, ...) + @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 undef, i32 addrspace(1)* %ptr, i32 addrspace(1)* %ptr) + %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 9, i32 9) + %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 10, i32 10) + call void (...) @use(i32 addrspace(1)* %a, i32 addrspace(1)* %b) + ret void +} + +;; Two gc.relocates of a bitcasted pointer should only require a single spill/fill +define void @test_gcptr_uniqueing(i32 addrspace(1)* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test_gcptr_uniqueing: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x30, x0, [sp, #16] +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl f +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: ldr x0, [sp, #24] +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: bl use +; CHECK-NEXT: ldr x30, [sp, #16] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + %ptr2 = bitcast i32 addrspace(1)* %ptr to i8 addrspace(1)* + %tok = tail call token (i64, i32, void ()*, i32, i32, ...) + @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 undef, i32 addrspace(1)* %ptr, i8 addrspace(1)* %ptr2) + %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 9, i32 9) + %b = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %tok, i32 10, i32 10) + call void (...) @use(i32 addrspace(1)* %a, i8 addrspace(1)* %b) + ret void +} + +;; A GC value is not dead, and does need to be spill (but not filed) if +;; that same value is also in the deopt list. +define void @test_deopt_use(i32 addrspace(1)* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test_deopt_use: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x30, x0, [sp, #16] +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl f +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: ldr x30, [sp, #16] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret + tail call token (i64, i32, void ()*, i32, i32, ...) + @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 undef, i32 addrspace(1)* %ptr) + ret void +} + +;; A GC value which is truely unused does not need to spilled or filled. +define void @test_dse(i32 addrspace(1)* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test_dse: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl f +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret + tail call token (i64, i32, void ()*, i32, i32, ...) + @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 2, i32 undef, i32 undef, i32 addrspace(1)* %ptr) + ret void +} Index: llvm/test/CodeGen/AArch64/statepoint-vector.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/statepoint-vector.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -debug-only=stackmaps < %s | FileCheck %s +; REQUIRES: asserts + +target triple = "aarch64-unknown-linux-gnu" + +; Can we lower a single vector? +define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" { +; CHECK-LABEL: test: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bl do_safepoint +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: ldr x30, [sp, #16] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret +entry: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj) + %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) + ret <2 x i8 addrspace(1)*> %obj.relocated +} + +; Can we lower the base, derived pairs if both are vectors? +define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" { +; CHECK-LABEL: test2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: dup v1.2d, x0 +; CHECK-NEXT: add v1.2d, v0.2d, v1.2d +; CHECK-NEXT: stp q1, q0, [sp] +; CHECK-NEXT: bl do_safepoint +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: ldr x30, [sp, #32] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived) + %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived) + ret <2 x i8 addrspace(1)*> %derived.relocated +} + +; Variant of @test2 above +define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" { +; CHECK-LABEL: test3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: str x30, [sp, #16] +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bl do_safepoint +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: ldr x30, [sp, #16] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret +entry: + br i1 %cnd, label %taken, label %untaken + +taken: ; preds = %entry + %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr + br label %merge + +untaken: ; preds = %entry + %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr + br label %merge + +merge: ; preds = %untaken, %taken + %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] + %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ] + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base) + %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj) + %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*> + %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base) + %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*> + ret <2 x i64 addrspace(1)*> %obj.relocated.casted +} + +; Can we handle vector constants? At the moment, we don't appear to actually +; get selection dag nodes for these. +define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" { +; CHECK-LABEL: test4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp xzr, x30, [sp, #8] +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str xzr, [sp] +; CHECK-NEXT: bl do_safepoint +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: ldr q0, [sp] +; CHECK-NEXT: ldr x30, [sp, #16] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret +entry: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer) + %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj) + ret <2 x i8 addrspace(1)*> %obj.relocated +} + +; Check that we can lower a constant typed as i128 correctly. Note that the +; actual value is representable in 64 bits. We don't have a representation +; of larger than 64 bit constant in the StackMap format. +define void @test5() gc "statepoint-example" { +; CHECK-LABEL: test5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl do_safepoint +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: ldr x30, [sp], #16 +; CHECK-NEXT: ret +entry: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0) + ret void +} + +; CHECK: __LLVM_StackMaps: + +; CHECK: .Ltmp0-test +; Check for the two spill slots +; Stack Maps: Loc 3: Indirect 31+0 [encoding: .byte 3, .byte 0, .hword 16, .hword 31, .hword 0, .word 0] +; Stack Maps: Loc 4: Indirect 31+0 [encoding: .byte 3, .byte 0, .hword 16, .hword 31, .hword 0, .word 0] +; CHECK: .byte 3 +; CHECK: .byte 0 +; CHECK: .hword 16 +; CHECK: .hword 31 +; CHECK: .hword 0 +; CHECK: .word 0 +; CHECK: .byte 3 +; CHECK: .byte 0 +; CHECK: .hword 16 +; CHECK: .hword 31 +; CHECK: .hword 0 +; CHECK: .word 0 + +; CHECK: .Ltmp1-test2 +; Check for the two spill slots +; Stack Maps: Loc 3: Indirect 31+16 [encoding: .byte 3, .byte 0, .hword 16, .hword 31, .hword 0, .word 16] +; Stack Maps: Loc 4: Indirect 31+0 [encoding: .byte 3, .byte 0, .hword 16, .hword 31, .hword 0, .word 0] +; CHECK: .byte 3 +; CHECK: .byte 0 +; CHECK: .hword 16 +; CHECK: .hword 31 +; CHECK: .hword 0 +; CHECK: .word 16 +; CHECK: .byte 3 +; CHECK: .byte 0 +; CHECK: .hword 16 +; CHECK: .hword 31 +; CHECK: .hword 0 +; CHECK: .word 0 + +; CHECK: .Ltmp2-test3 +; Check for the four spill slots +; Stack Maps: Loc 3: Indirect 31+0 [encoding: .byte 3, .byte 0, .hword 16, .hword 31, .hword 0, .word 0] +; Stack Maps: Loc 4: Indirect 31+0 [encoding: .byte 3, .byte 0, .hword 16, .hword 31, .hword 0, .word 0] +; CHECK: .byte 3 +; CHECK: .byte 0 +; CHECK: .hword 16 +; CHECK: .hword 31 +; CHECK: .hword 0 +; CHECK: .word 0 +; CHECK: .byte 3 +; CHECK: .byte 0 +; CHECK: .hword 16 +; CHECK: .hword 31 +; CHECK: .hword 0 +; CHECK: .word 0 + +declare void @do_safepoint() + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) +declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)