Index: include/llvm/CodeGen/FunctionLoweringInfo.h =================================================================== --- include/llvm/CodeGen/FunctionLoweringInfo.h +++ include/llvm/CodeGen/FunctionLoweringInfo.h @@ -18,6 +18,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -73,6 +74,16 @@ /// cross-basic-block values. DenseMap ValueMap; + // Keep track of frame indices allocated for invoke statepoint values + // used across basic block boundaries. + // First value of the key is statepoint, second is value for which location + // is stored. + // If optional value is unspecified it means that we have visited the value + // but didn't spill it. + typedef DenseMap, Optional> + StatepointRelocatedValuesMap; + StatepointRelocatedValuesMap StatepointRelocatedValues; + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in /// the entry block. This allows the allocas to be efficiently referenced /// anywhere in the function. Index: lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp =================================================================== --- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -526,6 +526,7 @@ ByValArgFrameIndexMap.clear(); RegFixups.clear(); StatepointStackSlots.clear(); + StatepointRelocatedValues.clear(); PreferredExtendType.clear(); } Index: lib/CodeGen/SelectionDAG/StatepointLowering.h =================================================================== --- lib/CodeGen/SelectionDAG/StatepointLowering.h +++ lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -56,25 +56,6 @@ Locations[val] = Location; } - /// Returns the relocated value for a given input pointer. Will - /// return SDValue() if this value hasn't yet been reloaded from - /// it's stack slot after the statepoint. Otherwise, the value - /// has already been reloaded and the SDValue of that reload will - /// be returned. Note that VMState values are spilled but not - /// reloaded (since they don't change at the safepoint unless - /// also listed in the GC pointer section) and will thus never - /// be in this map - SDValue getRelocLocation(SDValue val) { - if (!RelocLocations.count(val)) - return SDValue(); - return RelocLocations[val]; - } - void setRelocLocation(SDValue val, SDValue Location) { - assert(!RelocLocations.count(val) && - "Trying to allocate already allocated location"); - RelocLocations[val] = Location; - } - /// Record the fact that we expect to encounter a given gc_relocate /// before the next statepoint. If we don't see it, we'll report /// an assertion. @@ -117,8 +98,6 @@ /// Maps pre-relocation value (gc pointer directly incoming into statepoint) /// into it's location (currently only stack slots) DenseMap Locations; - /// Map pre-relocated value into it's new relocated location - DenseMap RelocLocations; /// A boolean indicator for each slot listed in the FunctionInfo as to /// whether it has been used in the current statepoint. Since we try to Index: lib/CodeGen/SelectionDAG/StatepointLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -43,7 +43,6 @@ assert(PendingGCRelocateCalls.empty() && "Trying to visit statepoint before finished processing previous one"); Locations.clear(); - RelocLocations.clear(); NextSlotToAllocate = 0; // Need to resize this on each safepoint - we need the two to stay in // sync and the clear patterns of a SelectionDAGBuilder have no relation @@ -53,9 +52,9 @@ AllocatedStackSlots[i] = false; } } + void StatepointLoweringState::clear() { Locations.clear(); - RelocLocations.clear(); AllocatedStackSlots.clear(); assert(PendingGCRelocateCalls.empty() && "cleared before statepoint sequence completed"); @@ -536,6 +535,39 @@ Incoming.getValueType())); } } + + // Record computed locations for all lowered values. + // This can not be embeded in lowering loops as we need to record *all* + // values, while previous loops account only values with unique SDValues. + for (GCRelocateOperands relocateOpers : + StatepointSite.getRelocates(StatepointSite)) { + const Value *V = relocateOpers.derivedPtr(); + SDValue SdV = Builder.getValue(V); + SDValue Loc = Builder.StatepointLowering.getLocation(SdV); + auto Key = std::make_pair(StatepointSite.getCallSite().getInstruction(), V); + + if (Loc.getNode()) { + Builder.FuncInfo.StatepointRelocatedValues[Key] = + cast(Loc)->getIndex(); + } + else { + // Record value as visited, but not spilled. This is case for allocas + // and constants. For this values we can avoid emiting spill load while + // visiting corresponding gc_relocate. + // Actually we may not record them in this map at all. We do this only + // to check that we are not relocating any unvisited value. + Builder.FuncInfo.StatepointRelocatedValues[Key] = None; + + // Default llvm mechanisms for exporting values which are used in + // different basic blocks does not work for gc relocates. + // Note that it would be incorrect to teach llvm that all relocates are + // uses of the corresponging values so that it would automatically + // export them. Relocates of the spilled values does not use original + // value. + if (StatepointSite.getCallSite().isInvoke()) + Builder.ExportFromCurrentBlock(V); + } + } } void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { @@ -559,11 +591,14 @@ ImmutableCallSite CS(ISP.getCallSite()); #ifndef NDEBUG - // Consistency check - for (const User *U : CS->users()) { - const CallInst *Call = cast(U); - if (isGCRelocate(Call)) - StatepointLowering.scheduleRelocCall(*Call); + // Consistency check. Don't do this for invokes. It would be too + // expensive to preserve this information across different basic blocks + if (!CS.isInvoke()) { + for (const User *U : CS->users()) { + const CallInst *Call = cast(U); + if (isGCRelocate(Call)) + StatepointLowering.scheduleRelocCall(*Call); + } } #endif @@ -692,42 +727,51 @@ } void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { + GCRelocateOperands relocateOpers(&CI); + #ifndef NDEBUG // Consistency check - StatepointLowering.relocCallVisited(CI); + // We skip this check for invoke statepoints. It would be too expensive to + // preserve validation info through different basic blocks. + if (!relocateOpers.isTiedToInvoke()) { + StatepointLowering.relocCallVisited(CI); + } #endif - GCRelocateOperands relocateOpers(&CI); - SDValue SD = getValue(relocateOpers.derivedPtr()); + const Value *DerivedPtr = relocateOpers.derivedPtr(); + SDValue SD = getValue(DerivedPtr); + + auto DerivedPtrKey = + std::make_pair(relocateOpers.statepoint(), + DerivedPtr); + // We should have recorded location for this pointer + assert(FuncInfo.StatepointRelocatedValues.count(DerivedPtrKey)); + Optional DerivedPtrLocation = + FuncInfo.StatepointRelocatedValues[DerivedPtrKey]; - if (isa(SD) || isa(SD)) { - // We didn't need to spill these special cases (constants and allocas). - // See the handling in spillIncomingValueForStatepoint for detail. + // We didn't need to spill these special cases (constants and allocas). + // See the handling in spillIncomingValueForStatepoint for detail. + if (!DerivedPtrLocation) { setValue(&CI, SD); return; } - SDValue Loc = StatepointLowering.getRelocLocation(SD); - // Emit new load if we did not emit it before - if (!Loc.getNode()) { - SDValue SpillSlot = StatepointLowering.getLocation(SD); - int FI = cast(SpillSlot)->getIndex(); - - // Be conservative: flush all pending loads - // TODO: Probably we can be less restrictive on this, - // it may allow more scheduling opprtunities - SDValue Chain = getRoot(); + SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation, + SD.getValueType()); - Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(FI), false, false, - false, 0); + // Be conservative: flush all pending loads + // TODO: Probably we can be less restrictive on this, + // it may allow more scheduling opprtunities + SDValue Chain = getRoot(); - StatepointLowering.setRelocLocation(SD, Loc); + SDValue SpillLoad = + DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + MachinePointerInfo::getFixedStack(*DerivedPtrLocation), + false, false, false, 0); - // Again, be conservative, don't emit pending loads - DAG.setRoot(Loc.getValue(1)); - } + // Again, be conservative, don't emit pending loads + DAG.setRoot(SpillLoad.getValue(1)); - assert(Loc.getNode()); - setValue(&CI, Loc); + assert(SpillLoad.getNode()); + setValue(&CI, SpillLoad); } Index: test/CodeGen/X86/statepoint-invoke.ll =================================================================== --- test/CodeGen/X86/statepoint-invoke.ll +++ test/CodeGen/X86/statepoint-invoke.ll @@ -2,11 +2,49 @@ target triple = "x86_64-pc-linux-gnu" +declare void @"some_call"(i64 addrspace(1)*) declare i64 addrspace(1)* @"some_other_call"(i64 addrspace(1)*) declare i32 @"personality_function"() -define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, +define i64 addrspace(1)* @test_basic(i64 addrspace(1)* %obj, + i64 addrspace(1)* %obj1) +gc "statepoint-example" { +entry: + ; CHECK: Ltmp{{[0-9]+}}: + ; CHECK: callq some_call + ; CHECK: Ltmp{{[0-9]+}}: + %0 = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) + to label %invoke_safepoint_normal_dest unwind label %exceptional_return + +invoke_safepoint_normal_dest: + ; CHECK: movq + %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 10, i32 10) + %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 11, i32 11) + br label %normal_return + +normal_return: + ; CHECK: retq + ret i64 addrspace(1)* %obj.relocated + +exceptional_return: + ; CHECK: Ltmp{{[0-9]+}}: + ; CHECK: movq + ; CHECK: retq + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 10, i32 10) + %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 11, i32 11) + ret i64 addrspace(1)* %obj1.relocated1 +} +; CHECK-LABEL: GCC_except_table{{[0-9]+}}: +; CHECK: .long .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} +; CHECK: .long .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}} +; CHECK: .byte 0 +; CHECK: .align 4 + +define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" { entry: @@ -24,11 +62,12 @@ exceptional_return: ; CHECK: .Ltmp{{[0-9]+}}: - ; CHECK: popq - ; CHECK: retq + ; CHECK: movq %landing_pad = landingpad { i8*, i32 } personality i32 ()* @personality_function cleanup - ret i64 addrspace(1)* %obj + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 10, i32 10) + ret i64 addrspace(1)* %obj.relocated } ; CHECK-LABEL: GCC_except_table{{[0-9]+}}: ; CHECK: .long .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} @@ -36,5 +75,124 @@ ; CHECK: .byte 0 ; CHECK: .align 4 +define i64 addrspace(1)* @test_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) + gc "statepoint-example" { +entry: + br i1 %cond, label %left, label %right + +left: + ; CHECK-LABEL: %left + ; CHECK: movq %rdx, 8(%rsp) + ; CHECK: movq + ; CHECK: callq some_call + %sp1 = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2) + to label %left.relocs unwind label %exceptional_return.left + +left.relocs: + ; CHECK: movq (%rsp), + ; CHECK: movq 8(%rsp), [[REGVAL2:%[a-z]+]] + %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 10, i32 10) + %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 11, i32 11) + br label %normal_return + +right: + ; CHECK-LABEL: %right + ; CHECK: movq + ; CHECK: movq %rdx, (%rsp) + ; CHECK: callq some_call + %sp2 = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) + to label %right.relocs unwind label %exceptional_return.right + +right.relocs: + ; CHECK: movq (%rsp), [[REGVAL2]] + ; CHECK: movq + %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 10, i32 10) + %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 11, i32 11) + br label %normal_return + +normal_return: + ; CHECK-LABEL: %normal_return + ; CHECK: cmoveq {{.*}}[[REGVAL2]]{{.*}} + ; CHECK retq + %a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs] + %a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs] + %ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2 + ret i64 addrspace(1)* %ret + +exceptional_return.left: + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 10, i32 10) + ret i64 addrspace(1)* %val.relocated2 + +exceptional_return.right: + %landing_pad1 = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token1 = extractvalue { i8*, i32 } %landing_pad1, 1 + %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token1, i32 10, i32 10) + ret i64 addrspace(1)* %val.relocated3 +} + +define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1) + gc "statepoint-example" { +; CHECK-LABEL: test_null_undef: +entry: + ; CHECK: callq some_call + %sp1 = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef) + to label %normal_return unwind label %exceptional_return + +normal_return: + ; CHECK-LABEL: %normal_return + ; CHECK: xorl %eax, %eax + ; CHECK-NEXT: popq + ; CHECK-NEXT: retq + %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 10, i32 10) + %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 11, i32 11) + ret i64 addrspace(1)* %null.relocated + +exceptional_return: + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 10, i32 10) + %undef.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 11, i32 11) + ret i64 addrspace(1)* %null.relocated2 +} + +define i64 addrspace(1)* @test_alloca_and_const(i64 addrspace(1)* %val1) + gc "statepoint-example" { +; CHECK-LABEL: test_alloca_and_const: +entry: + %a = alloca i32 + %aa = addrspacecast i32* %a to i32 addrspace(1)* + %c = inttoptr i64 15 to i64 addrspace(1)* + ; CHECK: callq + %sp = invoke i32 (void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %aa, i64 addrspace(1)* %c) + to label %normal_return unwind label %exceptional_return + +normal_return: + ; CHECK: leaq + ; CHECK-NEXT: popq + ; CHECK-NEXT: retq + %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %sp, i32 10, i32 10) + %aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)* + ret i64 addrspace(1)* %aa.converted + +exceptional_return: + ; CHECK: movl $15 + ; CHECK-NEXT: popq + ; CHECK-NEXT: retq + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 11, i32 11) + ret i64 addrspace(1)* %aa.rel2 +} + +declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(void (i64 addrspace(1)*)*, i32, i32, ...) declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) + +declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32)