Index: include/llvm/CodeGen/FunctionLoweringInfo.h =================================================================== --- include/llvm/CodeGen/FunctionLoweringInfo.h +++ include/llvm/CodeGen/FunctionLoweringInfo.h @@ -73,6 +73,14 @@ /// cross-basic-block values. DenseMap ValueMap; + // Keep track of frame indices allocated for invoke statepoint values + // used across basic block boundaries + // If first value of the pair is true then second will contain frame index. + // If first value of the pair is false it indicates that Value was visited + // but not spilled. It can happen for constant and allocas. We are using it + // for consistency checks. + DenseMap> InvokeStatepointValueSlots; + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in /// the entry block. This allows the allocas to be efficiently referenced /// anywhere in the function. Index: lib/CodeGen/SelectionDAG/StatepointLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -54,6 +54,7 @@ AllocatedStackSlots[i] = false; } } + void StatepointLoweringState::clear() { Locations.clear(); RelocLocations.clear(); @@ -543,6 +544,42 @@ } } + + // For invoke statepoint we need to export locations of all relocated values. + if (StatepointSite.getCallSite().isInvoke()) { + for (GCRelocateOperands relocateOpers : + StatepointSite.getRelocates(StatepointSite)) { + const Value *V = relocateOpers.derivedPtr(); + SDValue sdV = Builder.getValue(V); + SDValue loc = Builder.StatepointLowering.getLocation(sdV); + + if (loc.getNode()) { + // There might be already assigned slot for this value. This could happen + // only if statepoints which are using this value does not dominate + // each other. Otherwise if several statepoints dominate each other and + // use same value, one of them should use unrelocated value, which is + // incorrect. + // It means that we have case which looks like this: + // Value + // / | \ + // sp1 sp2 sp3 + // And since we are visiting basic blocks in reverse post order manner + // we will always finish processing one subtree before entering another + // which means that we are allowed to discard previously recorded + // locations for the same value. + Builder.FuncInfo.InvokeStatepointValueSlots[V] = + std::make_pair(true, cast(loc)->getIndex()); + } + else { + Builder.FuncInfo.InvokeStatepointValueSlots[V] = + std::make_pair(false, 0); + + // Not spilled values should be exported. Otherwise we end up using + // CopyFromReg from undefined register in visitGcRelocate. + Builder.ExportFromCurrentBlock(V); + } + } + } } void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { @@ -567,11 +604,14 @@ ImmutableCallSite CS(ISP.getCallSite()); #ifndef NDEBUG - // Consistency check - for (const User *U : CS->users()) { - const CallInst *Call = cast(U); - if (isGCRelocate(Call)) - StatepointLowering.scheduleRelocCall(*Call); + // Consistency check. Don't do this for invokes. It would be too + // expensive to preserve this information across different basic blocks + if (!CS.isInvoke()) { + for (const User *U : CS->users()) { + const CallInst *Call = cast(U); + if (isGCRelocate(Call)) + StatepointLowering.scheduleRelocCall(*Call); + } } #endif @@ -663,7 +703,7 @@ DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root // Remove originall call node DAG.DeleteNode(CallNode); - + // DON'T set the root - under the assumption that it's already set past the // inserted node we created. @@ -702,13 +742,19 @@ } void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { + GCRelocateOperands relocateOpers(&CI); + #ifndef NDEBUG // Consistency check - StatepointLowering.relocCallVisited(CI); + // We skip this check for invoke statepoints. It would be too expensive to + // preserve validation info through different basic blocks. + if (!relocateOpers.isTiedToInvoke()) { + StatepointLowering.relocCallVisited(CI); + } #endif - GCRelocateOperands relocateOpers(&CI); - SDValue SD = getValue(relocateOpers.derivedPtr()); + const Value *DerivedPtr = relocateOpers.derivedPtr(); + SDValue SD = getValue(DerivedPtr); if (isa(SD) || isa(SD)) { // We didn't need to spill these special cases (constants and allocas). @@ -717,11 +763,34 @@ return; } + if ((relocateOpers.isTiedToInvoke() && + !FuncInfo.InvokeStatepointValueSlots[DerivedPtr].first)) { + // In this case we have visited invoke statepoint value + // but didn't spill it. This is somewhat similar to the previous case + // but we can not use isa<...> checks because SD will always be CopyFromReg + // node. + setValue(&CI, SD); + return; + } + SDValue Loc = StatepointLowering.getRelocLocation(SD); // Emit new load if we did not emit it before if (!Loc.getNode()) { - SDValue SpillSlot = StatepointLowering.getLocation(SD); - int FI = cast(SpillSlot)->getIndex(); + int FI = INT32_MAX; + SDValue SpillSlot; + + if (relocateOpers.isTiedToInvoke()) { + assert(FuncInfo.InvokeStatepointValueSlots.count(DerivedPtr)); + assert(FuncInfo.InvokeStatepointValueSlots[DerivedPtr].first); + + FI = FuncInfo.InvokeStatepointValueSlots[DerivedPtr].second; + SpillSlot = DAG.getTargetFrameIndex(FI, SD.getValueType()); + } + else { + SpillSlot = StatepointLowering.getLocation(SD); + FI = cast(SpillSlot)->getIndex(); + } + assert(SpillSlot.getNode() && FI != INT32_MAX); // Be conservative: flush all pending loads // TODO: Probably we can be less restrictive on this, Index: test/CodeGen/X86/statepoint-invoke.ll =================================================================== --- test/CodeGen/X86/statepoint-invoke.ll +++ test/CodeGen/X86/statepoint-invoke.ll @@ -2,11 +2,49 @@ target triple = "x86_64-pc-linux-gnu" +declare void @"some_call"(i64 addrspace(1)*) declare i64 addrspace(1)* @"some_other_call"(i64 addrspace(1)*) declare i32 @"personality_function"() -define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, +define i64 addrspace(1)* @test_basic(i64 addrspace(1)* %obj, + i64 addrspace(1)* %obj1) +gc "statepoint-example" { +entry: + ; CHECK: Ltmp{{[0-9]+}}: + ; CHECK: callq some_call + ; CHECK: Ltmp{{[0-9]+}}: + %0 = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) + to label %invoke_safepoint_normal_dest unwind label %exceptional_return + +invoke_safepoint_normal_dest: + ; CHECK: movq + %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 10, i32 10) + %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 11, i32 11) + br label %normal_return + +normal_return: + ; CHECK: retq + ret i64 addrspace(1)* %obj.relocated + +exceptional_return: + ; CHECK: Ltmp{{[0-9]+}}: + ; CHECK: movq + ; CHECK: retq + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 10, i32 10) + %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 11, i32 11) + ret i64 addrspace(1)* %obj1.relocated1 +} +; CHECK-LABEL: GCC_except_table{{[0-9]+}}: +; CHECK: .long .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} +; CHECK: .long .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}} +; CHECK: .byte 0 +; CHECK: .align 4 + +define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" { entry: @@ -24,11 +62,12 @@ exceptional_return: ; CHECK: .Ltmp{{[0-9]+}}: - ; CHECK: popq - ; CHECK: retq + ; CHECK: movq %landing_pad = landingpad { i8*, i32 } personality i32 ()* @personality_function cleanup - ret i64 addrspace(1)* %obj + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 10, i32 10) + ret i64 addrspace(1)* %obj.relocated } ; CHECK-LABEL: GCC_except_table{{[0-9]+}}: ; CHECK: .long .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} @@ -36,5 +75,124 @@ ; CHECK: .byte 0 ; CHECK: .align 4 +define i64 addrspace(1)* @test_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) + gc "statepoint-example" { +entry: + br i1 %cond, label %left, label %right + +left: + ; CHECK-LABEL: %left + ; CHECK: movq %rdx, 8(%rsp) + ; CHECK: movq + ; CHECK: callq some_call + %sp1 = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2) + to label %left.relocs unwind label %exceptional_return.left + +left.relocs: + ; CHECK: movq (%rsp), + ; CHECK: movq 8(%rsp), [[REGVAL2:%[a-z]+]] + %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 10, i32 10) + %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 11, i32 11) + br label %normal_return + +right: + ; CHECK-LABEL: %right + ; CHECK: movq + ; CHECK: movq %rdx, (%rsp) + ; CHECK: callq some_call + %sp2 = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) + to label %right.relocs unwind label %exceptional_return.right + +right.relocs: + ; CHECK: movq (%rsp), [[REGVAL2]] + ; CHECK: movq + %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 10, i32 10) + %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 11, i32 11) + br label %normal_return + +normal_return: + ; CHECK-LABEL: %normal_return + ; CHECK: cmoveq {{.*}}[[REGVAL2]]{{.*}} + ; CHECK retq + %a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs] + %a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs] + %ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2 + ret i64 addrspace(1)* %ret + +exceptional_return.left: + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 10, i32 10) + ret i64 addrspace(1)* %val.relocated2 + +exceptional_return.right: + %landing_pad1 = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token1 = extractvalue { i8*, i32 } %landing_pad1, 1 + %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token1, i32 10, i32 10) + ret i64 addrspace(1)* %val.relocated3 +} + +define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1) + gc "statepoint-example" { +; CHECK-LABEL: test_null_undef: +entry: + ; CHECK: callq some_call + %sp1 = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef) + to label %normal_return unwind label %exceptional_return + +normal_return: + ; CHECK-LABEL: %normal_return + ; CHECK: xorl %eax, %eax + ; CHECK-NEXT: popq + ; CHECK-NEXT: retq + %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 10, i32 10) + %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 11, i32 11) + ret i64 addrspace(1)* %null.relocated + +exceptional_return: + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 10, i32 10) + %undef.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 11, i32 11) + ret i64 addrspace(1)* %null.relocated2 +} + +define i64 addrspace(1)* @test_alloca_and_const(i64 addrspace(1)* %val1) + gc "statepoint-example" { +; CHECK-LABEL: test_alloca_and_const: +entry: + %a = alloca i32 + %aa = addrspacecast i32* %a to i32 addrspace(1)* + %c = inttoptr i64 15 to i64 addrspace(1)* + ; CHECK: callq + %sp = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %aa, i64 addrspace(1)* %c) + to label %normal_return unwind label %exceptional_return + +normal_return: + ; CHECK: leaq + ; CHECK-NEXT: popq + ; CHECK-NEXT: retq + %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %sp, i32 10, i32 10) + %aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)* + ret i64 addrspace(1)* %aa.converted + +exceptional_return: + ; CHECK: movl $15 + ; CHECK-NEXT: popq + ; CHECK-NEXT: retq + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 11, i32 11) + ret i64 addrspace(1)* %aa.rel2 +} + +declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)*, i32, i32, ...) declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) + +declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32)