diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -841,7 +841,7 @@ Register Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Reg, RetTy, None); - SDValue Chain = DAG.getEntryNode(); + SDValue Chain = DAG.getRoot(); RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr); PendingExports.push_back(Chain); @@ -919,8 +919,9 @@ // Remove original call node DAG.DeleteNode(CallNode); - // DON'T set the root - under the assumption that it's already set past the - // inserted node we created. + // Since we always emit CopyToRegs (even for local relocates), we must + // update root, so that they are emitted before any local uses. + (void)getControlRoot(); // TODO: A better future implementation would be to emit a single variable // argument, variable return value STATEPOINT node here and then hookup the diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -8,8 +8,12 @@ declare void @func() declare void @"some_call"(i64 addrspace(1)*) declare void @consume(i32 addrspace(1)*) +declare i32 @consume1(i32) gc "statepoint-example" declare void @consume2(i32 addrspace(1)*, i32 addrspace(1)*) +declare void @consume3(float) gc "statepoint-example" +declare float @consume4(i64) gc "statepoint-example" declare void @consume5(i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 addrspace(1)*) + declare void @use1(i32 addrspace(1)*, i8 addrspace(1)*) declare i32 @"personality_function"() @@ -590,6 +594,90 @@ ret void } +; test multiple statepoints/relocates within single block. +; relocates must be properly scheduled w.r.t. statepoints +define void @test_sched(float %0, i32 %1, i8 addrspace(1)* %2) gc "statepoint-example" { +; CHECK-LABEL: test_sched: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: movl %edi, %ebp +; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq consume3 +; CHECK-NEXT: .Ltmp25: +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: cvtsi2sd %ebp, %xmm0 +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp26: +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%rsp) +; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp27: +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%rsp) +; CHECK-NEXT: movq %rbx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp28: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: ucomisd %xmm0, %xmm1 +; CHECK-NEXT: movabsq $9223372036854775807, %rdi # imm = 0x7FFFFFFFFFFFFFFF +; CHECK-NEXT: cmovbeq %rax, %rdi +; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss %xmm0, (%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp29: +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %token0 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 0, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %2) ] + %reloc1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token0, i32 0, i32 0) ; (%2, %2) + %tmp1 = sitofp i32 %1 to double + %to_max.i29 = fcmp ogt double %tmp1, 0.000000e+00 + %token1 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "gc-live"(i8 addrspace(1)* %reloc1) ] + %reloc2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1) + %reloc3 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token1, i32 0, i32 0) ; (%reloc1, %reloc1) + %token2 = call token (i64, i32, i32 (i32)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 2, i32 5, i32 (i32)* nonnull @consume1, i32 1, i32 0, i32 undef, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc2, i8 addrspace(1)* %reloc3) ] + %reloc4 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 0, i32 0) ; (%reloc3, %reloc2) + %reloc5 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token2, i32 1, i32 1) ; (%reloc3, %reloc3) + %token3 = call token (i64, i32, void (float)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 2, i32 5, void (float)* nonnull @consume3, i32 1, i32 0, float %0, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"(i8 addrspace(1)* %reloc4, i8 addrspace(1)* %reloc5) ] + %reloc6 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token3, i32 1, i32 0) ; (%reloc5, %reloc4) + %tmp5 = select i1 %to_max.i29, i64 9223372036854775807, i64 0 + %token4 = call token (i64, i32, float (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 2, i32 5, float (i64)* nonnull @consume4, i32 1, i32 0, i64 %tmp5, i32 0, i32 0) [ "deopt"(float %0, double %tmp1), "gc-live"() ] +ret void +} + +declare token @llvm.experimental.gc.statepoint.p0f_f32i64f(i64 immarg, i32 immarg, float (i64)*, i32 immarg, i32 immarg, ...) +declare token @llvm.experimental.gc.statepoint.p0f_i32i32f(i64 immarg, i32 immarg, i32 (i32)*, i32 immarg, i32 immarg, ...) +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf32f(i64 immarg, i32 immarg, void (float)*, i32 immarg, i32 immarg, ...) declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)