diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h --- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -92,35 +92,12 @@ DenseMap CatchPadExceptionPointers; /// Keep track of frame indices allocated for statepoints as they could be - /// used across basic block boundaries. This struct is more complex than a - /// simple map because the stateopint lowering code de-duplicates gc pointers - /// based on their SDValue (so %p and (bitcast %p to T) will get the same - /// slot), and we track that here. - - struct StatepointSpillMap { - using SlotMapTy = DenseMap>; - - /// Maps uniqued llvm IR values to the slots they were spilled in. If a - /// value is mapped to None it means we visited the value but didn't spill - /// it (because it was a constant, for instance). - SlotMapTy SlotMap; - - /// Maps llvm IR values to the values they were de-duplicated to. - DenseMap DuplicateMap; - - SlotMapTy::const_iterator find(const Value *V) const { - auto DuplIt = DuplicateMap.find(V); - if (DuplIt != DuplicateMap.end()) - V = DuplIt->second; - return SlotMap.find(V); - } - - SlotMapTy::const_iterator end() const { return SlotMap.end(); } - }; - - /// Maps gc.statepoint instructions to their corresponding StatepointSpillMap - /// instances. - DenseMap StatepointSpillMaps; + /// used across basic block boundaries (e.g. for an invoke). For each + /// gc.statepoint instruction, maps uniqued llvm IR values to the slots they + /// were spilled in. If a value is mapped to None it means we visited the + /// value but didn't spill it (because it was a constant, for instance). + using StatepointSpillMapTy = DenseMap>; + DenseMap StatepointSpillMaps; /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in /// the entry block. This allows the allocas to be efficiently referenced diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -268,45 +268,6 @@ Builder.StatepointLowering.setLocation(Incoming, Loc); } -/// Remove any duplicate (as SDValues) from the derived pointer pairs. This -/// is not required for correctness. It's purpose is to reduce the size of -/// StackMap section. It has no effect on the number of spill slots required -/// or the actual lowering. -static void -removeDuplicateGCPtrs(SmallVectorImpl &Bases, - SmallVectorImpl &Ptrs, - SmallVectorImpl &Relocs, - SelectionDAGBuilder &Builder, - FunctionLoweringInfo::StatepointSpillMap &SSM) { - DenseMap Seen; - - SmallVector NewBases, NewPtrs; - SmallVector NewRelocs; - for (size_t i = 0, e = Ptrs.size(); i < e; i++) { - SDValue SD = Builder.getValue(Ptrs[i]); - auto SeenIt = Seen.find(SD); - - if (SeenIt == Seen.end()) { - // Only add non-duplicates - NewBases.push_back(Bases[i]); - NewPtrs.push_back(Ptrs[i]); - NewRelocs.push_back(Relocs[i]); - Seen[SD] = Ptrs[i]; - } else { - // Duplicate pointer found, note in SSM and move on: - SSM.DuplicateMap[Ptrs[i]] = SeenIt->second; - } - } - assert(Bases.size() >= NewBases.size()); - assert(Ptrs.size() >= NewPtrs.size()); - assert(Relocs.size() >= NewRelocs.size()); - Bases = NewBases; - Ptrs = NewPtrs; - Relocs = NewRelocs; - assert(Ptrs.size() == Bases.size()); - assert(Ptrs.size() == Relocs.size()); -} - /// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result @@ -610,7 +571,7 @@ SDValue Loc = Builder.StatepointLowering.getLocation(SDV); if (Loc.getNode()) { - SpillMap.SlotMap[V] = cast(Loc)->getIndex(); + SpillMap[V] = cast(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas // and constants. For this values we can avoid emitting spill load while @@ -618,7 +579,7 @@ // Actually we do not need to record them in this map at all. // We do this only to check that we are not relocating any unvisited // value. - SpillMap.SlotMap[V] = None; + SpillMap[V] = None; // Default llvm mechanisms for exporting values which are used in // different basic blocks does not work for gc relocates. @@ -641,24 +602,15 @@ NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); + assert(SI.Bases.size() == SI.Ptrs.size() && + SI.Ptrs.size() == SI.GCRelocates.size()); #ifndef NDEBUG - // We schedule gc relocates before removeDuplicateGCPtrs since we _will_ - // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs. for (auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) StatepointLowering.scheduleRelocCall(*Reloc); #endif - // Remove any redundant llvm::Values which map to the same SDValue as another - // input. Also has the effect of removing duplicates in the original - // llvm::Value input list as well. This is a useful optimization for - // reducing the size of the StackMap section. It has no other impact. - removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this, - FuncInfo.StatepointSpillMaps[SI.StatepointInstr]); - assert(SI.Bases.size() == SI.Ptrs.size() && - SI.Ptrs.size() == SI.GCRelocates.size()); - // Lower statepoint vmstate and gcstate arguments SmallVector LoweredMetaArgs; SmallVector MemRefs; diff --git a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s | FileCheck %s + +; Check that we can export values of "duplicated" gc.relocates without a crash +; "duplicate" here means maps to same SDValue. We previously had an +; optimization which tried to reduce number of spills/stackmap entries in such +; cases, but it incorrectly handled exports across basis block boundaries +; leading to a crash in this case. + +target triple = "x86_64-pc-linux-gnu" + +declare void @func() + +define i1 @test() gc "statepoint-example" { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: callq func +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: callq func +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null, i32 addrspace(1)* null) + %base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7) + %derived = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 8) + %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %base, i32 addrspace(1)* %derived) + br label %next + +next: + %base_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 7, i32 7) + %derived_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 7, i32 8) + %cmp1 = icmp eq i32 addrspace(1)* %base_reloc, null + %cmp2 = icmp eq i32 addrspace(1)* %derived_reloc, null + %cmp = and i1 %cmp1, %cmp2 + ret i1 %cmp +} + +; Showing redundant fill on first statepoint and spill/fill on second one +define i1 @test2(i32 addrspace(1)* %arg) gc "statepoint-example" { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq func +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: callq func +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: orq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: sete %al +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %arg, i32 addrspace(1)* %arg) + %base = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7) + %derived = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 8) + %safepoint_token2 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %base, i32 addrspace(1)* %derived) + br label %next + +next: + %base_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 7, i32 7) + %derived_reloc = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 7, i32 8) + %cmp1 = icmp eq i32 addrspace(1)* %base_reloc, null + %cmp2 = icmp eq i32 addrspace(1)* %derived_reloc, null + %cmp = and i1 %cmp1, %cmp2 + ret i1 %cmp +} + + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)