diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -603,7 +603,7 @@ // Clear state StatepointLowering.startNewStatepoint(*this); assert(SI.Bases.size() == SI.Ptrs.size() && - SI.Ptrs.size() == SI.GCRelocates.size()); + SI.Ptrs.size() <= SI.GCRelocates.size()); #ifndef NDEBUG for (auto *Reloc : SI.GCRelocates) @@ -823,10 +823,29 @@ ISP.getNumCallArgs(), ActualCallee, ISP.getActualReturnType(), false /* IsPatchPoint */); + // There may be duplication in the gc.relocate list; such as two copies of + // each relocation on normal and exceptional path for an invoke. We only + // need to spill once and record one copy in the stackmap, but we need to + // reload once per gc.relocate. (Dedupping gc.relocates is trickier and best + // handled as a CSE problem elsewhere.) + // TODO: There a couple of major stackmap size optimizations we could do + // here if we wished. + // 1) If we've encountered a derived pair {B, D}, we don't need to actually + // record {B,B} if it's seen later. + // 2) Due to rematerialization, actual derived pointers are somewhat rare; + // given that, we could change the format to record base pointer relocations + // separately with half the space. This would require a format rev and a + // fairly major rework of the STATEPOINT node though. + SmallSet Seen; for (const GCRelocateInst *Relocate : ISP.getRelocates()) { SI.GCRelocates.push_back(Relocate); - SI.Bases.push_back(Relocate->getBasePtr()); - SI.Ptrs.push_back(Relocate->getDerivedPtr()); + + SDValue BaseSD = getValue(Relocate->getBasePtr()); + SDValue DerivedSD = getValue(Relocate->getDerivedPtr()); + if (Seen.insert(DerivedSD).second) { + SI.Bases.push_back(Relocate->getBasePtr()); + SI.Ptrs.push_back(Relocate->getDerivedPtr()); + } } SI.GCArgs = ArrayRef(ISP.gc_args_begin(), ISP.gc_args_end()); diff --git a/llvm/test/CodeGen/X86/statepoint-stackmap-size.ll b/llvm/test/CodeGen/X86/statepoint-stackmap-size.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-stackmap-size.ll @@ -0,0 +1,22 @@ +; RUN: llc -verify-machineinstrs < %s | fgrep -A 10000 .llvm_stackmaps | wc -l | FileCheck %s + +; Without removal of duplicate entries, the size is 62 lines +; CHECK: 50 + +target triple = "x86_64-pc-linux-gnu" + +declare void @func() + +define i1 @test1(i32 addrspace(1)* %arg) gc "statepoint-example" { +entry: + %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %arg) + %reloc1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7) + %reloc2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7) + %cmp1 = icmp eq i32 addrspace(1)* %reloc1, null + %cmp2 = icmp eq i32 addrspace(1)* %reloc2, null + %cmp = and i1 %cmp1, %cmp2 + ret i1 %cmp +} + +declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) +declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)