diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -869,9 +869,11 @@ // Compute return values. Provide a glue output since we consume one as // input. This allows someone else to chain off us as needed. SmallVector NodeTys; + DenseMap Value2Res; for (auto SD : LoweredGCArgs) { if (!LowerAsVReg.count(SD)) continue; + Value2Res[SD] = NodeTys.size(); NodeTys.push_back(SD.getValueType()); } LLVM_DEBUG(dbgs() << "Statepoint has " << NodeTys.size() << " results\n"); @@ -884,8 +886,8 @@ DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); DAG.setNodeMemRefs(StatepointMCNode, MemRefs); - // For values lowered to tied-defs, create the virtual registers. Note that - // for simplicity, we *always* create a vreg even within a single block. + // For values lowered to tied-defs, create the virtual registers if used + // in other blocks. For local gc.relocate uses map them to SDValue here. DenseMap VirtRegs; for (const auto *Relocate : SI.GCRelocates) { Value *Derived = Relocate->getDerivedPtr(); @@ -893,6 +895,12 @@ if (!LowerAsVReg.count(SD)) continue; + if (SI.StatepointInstr->getParent() == Relocate->getParent()) { + assert(Value2Res.find(SD) != Value2Res.end() && "not found"); + NodeMap[Relocate] = SDValue(StatepointMCNode, Value2Res[SD]); + continue; + } + // Handle multiple gc.relocates of the same input efficiently. if (VirtRegs.count(SD)) continue; @@ -919,10 +927,12 @@ SDValue SDV = getValue(V); SDValue Loc = StatepointLowering.getLocation(SDV); + bool IsLocal = (Relocate->getParent() == StatepointInstr->getParent()); + RecordType Record; if (LowerAsVReg.count(SDV)) { Record.type = RecordType::VReg; - assert(VirtRegs.count(SDV)); + assert(IsLocal || VirtRegs.count(SDV)); Record.payload.Reg = VirtRegs[SDV]; } else if (Loc.getNode()) { Record.type = RecordType::Spill; @@ -1211,6 +1221,12 @@ assert(*IsManaged && "Non gc managed pointer relocated!"); #endif + if (NodeMap.find(&Relocate) != NodeMap.end()) { + assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() && + "Nonlocal gc.relocate already mapped"); + return; + } + const Value *DerivedPtr = Relocate.getDerivedPtr(); auto &RelocationMap = FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()]; diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll @@ -322,14 +322,14 @@ ;CHECK-VREG: %1:gr64 = COPY $rsi ;CHECK-VREG: %0:gr64 = COPY $rdi ;CHECK-VREG: TEST32rr %2, %2, implicit-def $eflags -;CHECK-VREG: %5:gr64 = CMOV64rr %1, %0, 4, implicit $eflags -;CHECK-VREG: %6:gr32 = MOV32r0 implicit-def dead $eflags -;CHECK-VREG: %7:gr64 = SUBREG_TO_REG 0, killed %6, %subreg.sub_32bit -;CHECK-VREG: $rdi = COPY %7 -;CHECK-VREG: $rsi = COPY %5 -;CHECK-VREG: %3:gr64, %4:gr64 = STATEPOINT 10, 0, 2, @bar, $rdi, $rsi, 2, 0, 2, 0, 2, 0, 2, 2, %1(tied-def 0), %0(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp +;CHECK-VREG: %3:gr64 = CMOV64rr %1, %0, 4, implicit $eflags +;CHECK-VREG: %4:gr32 = MOV32r0 implicit-def dead $eflags +;CHECK-VREG: %5:gr64 = SUBREG_TO_REG 0, killed %4, %subreg.sub_32bit +;CHECK-VREG: $rdi = COPY %5 +;CHECK-VREG: $rsi = COPY %3 +;CHECK-VREG: %6:gr64, %7:gr64 = STATEPOINT 10, 0, 2, @bar, $rdi, $rsi, 2, 0, 2, 0, 2, 0, 2, 2, %1(tied-def 0), %0(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp ;CHECK-VREG: TEST32rr %2, %2, implicit-def $eflags -;CHECK-VREG: %8:gr64 = CMOV64rr %3, %4, 4, implicit $eflags +;CHECK-VREG: %8:gr64 = CMOV64rr %6, killed %7, 4, implicit $eflags ;CHECK-VREG: $rax = COPY %8 ;CHECK-VREG: RET 0, $rax entry: @@ -342,14 +342,14 @@ ret i8 addrspace(1)* %res } -; Show that ISEL of gc.relocate used in other BB does generate extra COPY instruction. +; Check that ISEL of gc.relocate used in other BB does not generate extra COPY instruction. define i1 @test_cross_bb_reloc(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint-example" { ; CHECK-VREG_LABEL: test_cross_bb_reloc: ; CHECK-VREG: bb.0.entry: ; CHECK-VREG: [[VREG:%[^ ]+]]:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, %2(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al -; CHECK-VREG: [[EXTRA:%[^ ]+]]:gr64 = COPY [[VREG]] +; CHECK-VREG-NOT: COPY [[VREG]] ; CHECK-VREG: bb.1.left: -; CHECK-VREG: $rdi = COPY [[EXTRA]] +; CHECK-VREG: $rdi = COPY [[VREG]] ; CHECK-VREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp ; CHECK-VREG: $al = COPY %1 ; CHECK-VREG: RET 0, $al diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll @@ -12,43 +12,43 @@ i32 addrspace(1)* %arg12, i32 addrspace(1)* %arg13, i32 addrspace(1)* %arg14, i32 addrspace(1)* %arg15, i32 addrspace(1)* %arg16, i32 addrspace(1)* %arg17 ) gc "statepoint-example" { ; CHECK-VREG-LABEL: test_spill -; CHECK-VREG: %18:gr64 = COPY $r9 -; CHECK-VREG: %19:gr64 = COPY $r8 -; CHECK-VREG: %20:gr64 = COPY $rcx -; CHECK-VREG: %21:gr64 = COPY $rdx -; CHECK-VREG: %22:gr64 = COPY $rsi -; CHECK-VREG: %23:gr64 = COPY $rdi -; CHECK-VREG: %17:gr64 = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.11, align 16) -; CHECK-VREG: %16:gr64 = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) -; CHECK-VREG: %15:gr64 = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.9, align 16) -; CHECK-VREG: %14:gr64 = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) -; CHECK-VREG: %13:gr64 = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) -; CHECK-VREG: %12:gr64 = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) -; CHECK-VREG: %11:gr64 = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) -; CHECK-VREG: %10:gr64 = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) -; CHECK-VREG: %9:gr64 = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) -; CHECK-VREG: %8:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) -; CHECK-VREG: %7:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) -; CHECK-VREG: %6:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) -; CHECK-VREG: %6:gr64, %7:gr64, %8:gr64, %9:gr64, %10:gr64, %11:gr64, %12:gr64, %13:gr64, %14:gr64, %15:gr64, %16:gr64, %17:gr64, %18:gr64, %19:gr64, %20:gr64, %21:gr64, %22:gr64, %23:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, %6(tied-def 0), %7(tied-def 1), %8(tied-def 2), %9(tied-def 3), %10(tied-def 4), %11(tied-def 5), %12(tied-def 6), %13(tied-def 7), %14(tied-def 8), %15(tied-def 9), %16(tied-def 10), %17(tied-def 11), %18(tied-def 12), %19(tied-def 13), %20(tied-def 14), %21(tied-def 15), %22(tied-def 16), %23(tied-def 17), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp -; CHECK-VREG: %38:gr32 = MOV32rm %23, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %22, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %21, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %20, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %19, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %18, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %17, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %16, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %15, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %14, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %13, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %12, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %11, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %10, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %9, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %8, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %7, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %6, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) +; CHECK-VREG: %30:gr64 = COPY $r9 +; CHECK-VREG: %31:gr64 = COPY $r8 +; CHECK-VREG: %32:gr64 = COPY $rcx +; CHECK-VREG: %33:gr64 = COPY $rdx +; CHECK-VREG: %34:gr64 = COPY $rsi +; CHECK-VREG: %35:gr64 = COPY $rdi +; CHECK-VREG: %29:gr64 = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.11, align 16) +; CHECK-VREG: %28:gr64 = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) +; CHECK-VREG: %27:gr64 = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.9, align 16) +; CHECK-VREG: %26:gr64 = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) +; CHECK-VREG: %25:gr64 = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) +; CHECK-VREG: %24:gr64 = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) +; CHECK-VREG: %23:gr64 = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) +; CHECK-VREG: %22:gr64 = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) +; CHECK-VREG: %21:gr64 = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +; CHECK-VREG: %20:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) +; CHECK-VREG: %19:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) +; CHECK-VREG: %18:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) +; CHECK-VREG: %18:gr64, %19:gr64, %20:gr64, %21:gr64, %22:gr64, %23:gr64, %24:gr64, %25:gr64, %26:gr64, %27:gr64, %28:gr64, %29:gr64, %30:gr64, %31:gr64, %32:gr64, %33:gr64, %34:gr64, %35:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, %18(tied-def 0), %19(tied-def 1), %20(tied-def 2), %21(tied-def 3), %22(tied-def 4), %23(tied-def 5), %24(tied-def 6), %25(tied-def 7), %26(tied-def 8), %27(tied-def 9), %28(tied-def 10), %29(tied-def 11), %30(tied-def 12), %31(tied-def 13), %32(tied-def 14), %33(tied-def 15), %34(tied-def 16), %35(tied-def 17), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK-VREG: %38:gr32 = MOV32rm %35, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %34, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %33, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %32, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %31, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %30, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %29, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %28, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %27, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %26, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %25, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %24, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %23, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %22, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %21, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %20, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %19, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %18, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) ; CHECK-VREG: $eax = COPY %38 ; CHECK-PREG: renamable $rbx = COPY $r9 diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -434,20 +434,20 @@ ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: .Ltmp14: -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movss %xmm0, (%rsp) -; CHECK-NEXT: nopl 8(%rax,%rax) -; CHECK-NEXT: .Ltmp15: +; CHECK-NEXT: movss %xmm0, {{[-0-9]*}}(%rsp) ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp15: ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm0, (%rsp) +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: .Ltmp16: ; CHECK-NEXT: xorl %eax, %eax