Index: llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h +++ llvm/trunk/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -18,6 +18,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -73,6 +74,16 @@ /// cross-basic-block values. DenseMap ValueMap; + // Keep track of frame indices allocated for statepoints as they could be used + // across basic block boundaries. + // Key of the map is statepoint instruction, value is a map from spilled + // llvm Value to the optional stack stack slot index. + // If optional is unspecified it means that we have visited this value + // but didn't spill it. + typedef DenseMap> StatepointSpilledValueMapTy; + DenseMap + StatepointRelocatedValues; + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in /// the entry block. This allows the allocas to be efficiently referenced /// anywhere in the function. Index: llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -579,6 +579,7 @@ ByValArgFrameIndexMap.clear(); RegFixups.clear(); StatepointStackSlots.clear(); + StatepointRelocatedValues.clear(); PreferredExtendType.clear(); } Index: llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.h =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -56,25 +56,6 @@ Locations[val] = Location; } - /// Returns the relocated value for a given input pointer. Will - /// return SDValue() if this value hasn't yet been reloaded from - /// it's stack slot after the statepoint. Otherwise, the value - /// has already been reloaded and the SDValue of that reload will - /// be returned. Note that VMState values are spilled but not - /// reloaded (since they don't change at the safepoint unless - /// also listed in the GC pointer section) and will thus never - /// be in this map - SDValue getRelocLocation(SDValue val) { - if (!RelocLocations.count(val)) - return SDValue(); - return RelocLocations[val]; - } - void setRelocLocation(SDValue val, SDValue Location) { - assert(!RelocLocations.count(val) && - "Trying to allocate already allocated location"); - RelocLocations[val] = Location; - } - /// Record the fact that we expect to encounter a given gc_relocate /// before the next statepoint. If we don't see it, we'll report /// an assertion. @@ -117,8 +98,6 @@ /// Maps pre-relocation value (gc pointer directly incoming into statepoint) /// into it's location (currently only stack slots) DenseMap Locations; - /// Map pre-relocated value into it's new relocated location - DenseMap RelocLocations; /// A boolean indicator for each slot listed in the FunctionInfo as to /// whether it has been used in the current statepoint. Since we try to Index: llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -51,7 +51,6 @@ assert(PendingGCRelocateCalls.empty() && "Trying to visit statepoint before finished processing previous one"); Locations.clear(); - RelocLocations.clear(); NextSlotToAllocate = 0; // Need to resize this on each safepoint - we need the two to stay in // sync and the clear patterns of a SelectionDAGBuilder have no relation @@ -61,9 +60,9 @@ AllocatedStackSlots[i] = false; } } + void StatepointLoweringState::clear() { Locations.clear(); - RelocLocations.clear(); AllocatedStackSlots.clear(); assert(PendingGCRelocateCalls.empty() && "cleared before statepoint sequence completed"); @@ -527,6 +526,41 @@ Incoming.getValueType())); } } + + // Record computed locations for all lowered values. + // This can not be embedded in lowering loops as we need to record *all* + // values, while previous loops account only values with unique SDValues. + const Instruction *StatepointInstr = + StatepointSite.getCallSite().getInstruction(); + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr]; + + for (GCRelocateOperands RelocateOpers : + StatepointSite.getRelocates(StatepointSite)) { + const Value *V = RelocateOpers.getDerivedPtr(); + SDValue SDV = Builder.getValue(V); + SDValue Loc = Builder.StatepointLowering.getLocation(SDV); + + if (Loc.getNode()) { + SpillMap[V] = cast(Loc)->getIndex(); + } else { + // Record value as visited, but not spilled. This is case for allocas + // and constants. For this values we can avoid emiting spill load while + // visiting corresponding gc_relocate. + // Actually we do not need to record them in this map at all. + // We do this only to check that we are not relocating any unvisited value. + SpillMap[V] = None; + + // Default llvm mechanisms for exporting values which are used in + // different basic blocks does not work for gc relocates. + // Note that it would be incorrect to teach llvm that all relocates are + // uses of the corresponging values so that it would automatically + // export them. Relocates of the spilled values does not use original + // value. + if (StatepointSite.getCallSite().isInvoke()) + Builder.ExportFromCurrentBlock(V); + } + } } void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { @@ -550,11 +584,14 @@ ImmutableCallSite CS(ISP.getCallSite()); #ifndef NDEBUG - // Consistency check - for (const User *U : CS->users()) { - const CallInst *Call = cast(U); - if (isGCRelocate(Call)) - StatepointLowering.scheduleRelocCall(*Call); + // Consistency check. Don't do this for invokes. It would be too + // expensive to preserve this information across different basic blocks + if (!CS.isInvoke()) { + for (const User *U : CS->users()) { + const CallInst *Call = cast(U); + if (isGCRelocate(Call)) + StatepointLowering.scheduleRelocCall(*Call); + } } #endif @@ -756,42 +793,50 @@ } void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { + GCRelocateOperands RelocateOpers(&CI); + #ifndef NDEBUG // Consistency check - StatepointLowering.relocCallVisited(CI); + // We skip this check for invoke statepoints. It would be too expensive to + // preserve validation info through different basic blocks. + if (!RelocateOpers.isTiedToInvoke()) { + StatepointLowering.relocCallVisited(CI); + } #endif - GCRelocateOperands relocateOpers(&CI); - SDValue SD = getValue(relocateOpers.getDerivedPtr()); + const Value *DerivedPtr = RelocateOpers.getDerivedPtr(); + SDValue SD = getValue(DerivedPtr); + + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + FuncInfo.StatepointRelocatedValues[RelocateOpers.getStatepoint()]; - if (isa(SD) || isa(SD)) { - // We didn't need to spill these special cases (constants and allocas). - // See the handling in spillIncomingValueForStatepoint for detail. + // We should have recorded location for this pointer + assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value"); + Optional DerivedPtrLocation = SpillMap[DerivedPtr]; + + // We didn't need to spill these special cases (constants and allocas). + // See the handling in spillIncomingValueForStatepoint for detail. + if (!DerivedPtrLocation) { setValue(&CI, SD); return; } - SDValue Loc = StatepointLowering.getRelocLocation(SD); - // Emit new load if we did not emit it before - if (!Loc.getNode()) { - SDValue SpillSlot = StatepointLowering.getLocation(SD); - int FI = cast(SpillSlot)->getIndex(); - - // Be conservative: flush all pending loads - // TODO: Probably we can be less restrictive on this, - // it may allow more scheduling opprtunities - SDValue Chain = getRoot(); - - Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(FI), false, false, - false, 0); + SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation, + SD.getValueType()); - StatepointLowering.setRelocLocation(SD, Loc); + // Be conservative: flush all pending loads + // TODO: Probably we can be less restrictive on this, + // it may allow more scheduling opprtunities + SDValue Chain = getRoot(); + + SDValue SpillLoad = + DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + MachinePointerInfo::getFixedStack(*DerivedPtrLocation), + false, false, false, 0); - // Again, be conservative, don't emit pending loads - DAG.setRoot(Loc.getValue(1)); - } + // Again, be conservative, don't emit pending loads + DAG.setRoot(SpillLoad.getValue(1)); - assert(Loc.getNode()); - setValue(&CI, Loc); + assert(SpillLoad.getNode()); + setValue(&CI, SpillLoad); } Index: llvm/trunk/test/CodeGen/X86/statepoint-invoke.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/statepoint-invoke.ll +++ llvm/trunk/test/CodeGen/X86/statepoint-invoke.ll @@ -2,11 +2,49 @@ target triple = "x86_64-pc-linux-gnu" +declare void @"some_call"(i64 addrspace(1)*) declare i64 addrspace(1)* @"some_other_call"(i64 addrspace(1)*) declare i32 @"personality_function"() -define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, +define i64 addrspace(1)* @test_basic(i64 addrspace(1)* %obj, + i64 addrspace(1)* %obj1) +gc "statepoint-example" { +entry: + ; CHECK: Ltmp{{[0-9]+}}: + ; CHECK: callq some_call + ; CHECK: Ltmp{{[0-9]+}}: + %0 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) + to label %invoke_safepoint_normal_dest unwind label %exceptional_return + +invoke_safepoint_normal_dest: + ; CHECK: movq + %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 13, i32 13) + %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 14, i32 14) + br label %normal_return + +normal_return: + ; CHECK: retq + ret i64 addrspace(1)* %obj.relocated + +exceptional_return: + ; CHECK: Ltmp{{[0-9]+}}: + ; CHECK: movq + ; CHECK: retq + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13) + %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14) + ret i64 addrspace(1)* %obj1.relocated1 +} +; CHECK-LABEL: GCC_except_table{{[0-9]+}}: +; CHECK: .long .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} +; CHECK: .long .Ltmp{{[0-9]+}}-.Lfunc_begin{{[0-9]+}} +; CHECK: .byte 0 +; CHECK: .align 4 + +define i64 addrspace(1)* @test_result(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" { entry: @@ -24,11 +62,12 @@ exceptional_return: ; CHECK: .Ltmp{{[0-9]+}}: - ; CHECK: popq - ; CHECK: retq + ; CHECK: movq %landing_pad = landingpad { i8*, i32 } personality i32 ()* @personality_function cleanup - ret i64 addrspace(1)* %obj + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13) + ret i64 addrspace(1)* %obj.relocated } ; CHECK-LABEL: GCC_except_table{{[0-9]+}}: ; CHECK: .long .Ltmp{{[0-9]+}}-.Ltmp{{[0-9]+}} @@ -36,5 +75,124 @@ ; CHECK: .byte 0 ; CHECK: .align 4 +define i64 addrspace(1)* @test_same_val(i1 %cond, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) + gc "statepoint-example" { +entry: + br i1 %cond, label %left, label %right + +left: + ; CHECK-LABEL: %left + ; CHECK: movq %rdx, 8(%rsp) + ; CHECK: movq + ; CHECK: callq some_call + %sp1 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2) + to label %left.relocs unwind label %exceptional_return.left + +left.relocs: + ; CHECK: movq (%rsp), + ; CHECK: movq 8(%rsp), [[REGVAL2:%[a-z]+]] + %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 13, i32 13) + %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 14, i32 14) + br label %normal_return + +right: + ; CHECK-LABEL: %right + ; CHECK: movq + ; CHECK: movq %rdx, (%rsp) + ; CHECK: callq some_call + %sp2 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3) + to label %right.relocs unwind label %exceptional_return.right + +right.relocs: + ; CHECK: movq (%rsp), [[REGVAL2]] + ; CHECK: movq + %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 13, i32 13) + %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 14, i32 14) + br label %normal_return + +normal_return: + ; CHECK-LABEL: %normal_return + ; CHECK: cmoveq {{.*}}[[REGVAL2]]{{.*}} + ; CHECK retq + %a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs] + %a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs] + %ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2 + ret i64 addrspace(1)* %ret + +exceptional_return.left: + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13) + ret i64 addrspace(1)* %val.relocated2 + +exceptional_return.right: + %landing_pad1 = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token1 = extractvalue { i8*, i32 } %landing_pad1, 1 + %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token1, i32 13, i32 13) + ret i64 addrspace(1)* %val.relocated3 +} + +define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1) + gc "statepoint-example" { +; CHECK-LABEL: test_null_undef: +entry: + ; CHECK: callq some_call + %sp1 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef) + to label %normal_return unwind label %exceptional_return + +normal_return: + ; CHECK-LABEL: %normal_return + ; CHECK: xorl %eax, %eax + ; CHECK-NEXT: popq + ; CHECK-NEXT: retq + %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 13, i32 13) + %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 14, i32 14) + ret i64 addrspace(1)* %null.relocated + +exceptional_return: + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13) + %undef.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14) + ret i64 addrspace(1)* %null.relocated2 +} + +define i64 addrspace(1)* @test_alloca_and_const(i64 addrspace(1)* %val1) + gc "statepoint-example" { +; CHECK-LABEL: test_alloca_and_const: +entry: + %a = alloca i32 + %aa = addrspacecast i32* %a to i32 addrspace(1)* + %c = inttoptr i64 15 to i64 addrspace(1)* + ; CHECK: callq + %sp = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %aa, i64 addrspace(1)* %c) + to label %normal_return unwind label %exceptional_return + +normal_return: + ; CHECK: leaq + ; CHECK-NEXT: popq + ; CHECK-NEXT: retq + %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %sp, i32 13, i32 13) + %aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)* + ret i64 addrspace(1)* %aa.converted + +exceptional_return: + ; CHECK: movl $15 + ; CHECK-NEXT: popq + ; CHECK-NEXT: retq + %landing_pad = landingpad { i8*, i32 } personality i32 ()* @"personality_function" + cleanup + %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1 + %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14) + ret i64 addrspace(1)* %aa.rel2 +} + +declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) -declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32) \ No newline at end of file + +declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) +declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32)