diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -884,8 +884,8 @@ DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); DAG.setNodeMemRefs(StatepointMCNode, MemRefs); - // For values lowered to tied-defs, create the virtual registers. Note that - // for simplicity, we *always* create a vreg even within a single block. + // For values lowered to tied-defs, create the virtual registers if used + // in other blocks. For local gc.relocate uses map them to SDValue here. DenseMap VirtRegs; for (const auto *Relocate : SI.GCRelocates) { Value *Derived = Relocate->getDerivedPtr(); @@ -893,6 +893,12 @@ if (!LowerAsVReg.count(SD)) continue; + if (SI.StatepointInstr->getParent() == Relocate->getParent()) { + assert(LowerAsVReg.find(SD) != LowerAsVReg.end() && "not found"); + NodeMap[Relocate] = SDValue(StatepointMCNode, LowerAsVReg[SD]); + continue; + } + // Handle multiple gc.relocates of the same input efficiently. if (VirtRegs.count(SD)) continue; @@ -919,10 +925,12 @@ SDValue SDV = getValue(V); SDValue Loc = StatepointLowering.getLocation(SDV); + bool IsLocal = (Relocate->getParent() == StatepointInstr->getParent()); + RecordType Record; if (LowerAsVReg.count(SDV)) { Record.type = RecordType::VReg; - assert(VirtRegs.count(SDV)); + assert(IsLocal || VirtRegs.count(SDV)); Record.payload.Reg = VirtRegs[SDV]; } else if (Loc.getNode()) { Record.type = RecordType::Spill; @@ -1211,6 +1219,12 @@ assert(*IsManaged && "Non gc managed pointer relocated!"); #endif + if (NodeMap.find(&Relocate) != NodeMap.end()) { + assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() && + "Nonlocal gc.relocate already mapped"); + return; + } + const Value *DerivedPtr = Relocate.getDerivedPtr(); auto &RelocationMap = FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()]; diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll @@ -322,14 +322,14 @@ ;CHECK-VREG: %1:gr64 = COPY $rsi ;CHECK-VREG: %0:gr64 = COPY $rdi ;CHECK-VREG: TEST32rr %2, %2, implicit-def $eflags -;CHECK-VREG: %5:gr64 = CMOV64rr %1, %0, 4, implicit $eflags -;CHECK-VREG: %6:gr32 = MOV32r0 implicit-def dead $eflags -;CHECK-VREG: %7:gr64 = SUBREG_TO_REG 0, killed %6, %subreg.sub_32bit -;CHECK-VREG: $rdi = COPY %7 -;CHECK-VREG: $rsi = COPY %5 -;CHECK-VREG: %3:gr64, %4:gr64 = STATEPOINT 10, 0, 2, @bar, $rdi, $rsi, 2, 0, 2, 0, 2, 0, 2, 2, %1(tied-def 0), %0(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp +;CHECK-VREG: %3:gr64 = CMOV64rr %1, %0, 4, implicit $eflags +;CHECK-VREG: %4:gr32 = MOV32r0 implicit-def dead $eflags +;CHECK-VREG: %5:gr64 = SUBREG_TO_REG 0, killed %4, %subreg.sub_32bit +;CHECK-VREG: $rdi = COPY %5 +;CHECK-VREG: $rsi = COPY %3 +;CHECK-VREG: %6:gr64, %7:gr64 = STATEPOINT 10, 0, 2, @bar, $rdi, $rsi, 2, 0, 2, 0, 2, 0, 2, 2, %1(tied-def 0), %0(tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp ;CHECK-VREG: TEST32rr %2, %2, implicit-def $eflags -;CHECK-VREG: %8:gr64 = CMOV64rr %3, %4, 4, implicit $eflags +;CHECK-VREG: %8:gr64 = CMOV64rr %6, killed %7, 4, implicit $eflags ;CHECK-VREG: $rax = COPY %8 ;CHECK-VREG: RET 0, $rax entry: @@ -342,14 +342,14 @@ ret i8 addrspace(1)* %res } -; Show that ISEL of gc.relocate used in other BB does generate extra COPY instruction. +; Check that ISEL of gc.relocate used in other BB does not generate extra COPY instruction. define i1 @test_cross_bb_reloc(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint-example" { ; CHECK-VREG_LABEL: test_cross_bb_reloc: ; CHECK-VREG: bb.0.entry: ; CHECK-VREG: [[VREG:%[^ ]+]]:gr64 = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, %2(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al -; CHECK-VREG: [[EXTRA:%[^ ]+]]:gr64 = COPY [[VREG]] +; CHECK-VREG-NOT: COPY [[VREG]] ; CHECK-VREG: bb.1.left: -; CHECK-VREG: $rdi = COPY [[EXTRA]] +; CHECK-VREG: $rdi = COPY [[VREG]] ; CHECK-VREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp ; CHECK-VREG: $al = COPY %1 ; CHECK-VREG: RET 0, $al diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-unlimited-tied-opnds.ll @@ -12,43 +12,43 @@ i32 addrspace(1)* %arg12, i32 addrspace(1)* %arg13, i32 addrspace(1)* %arg14, i32 addrspace(1)* %arg15, i32 addrspace(1)* %arg16, i32 addrspace(1)* %arg17 ) gc "statepoint-example" { ; CHECK-VREG-LABEL: test_spill -; CHECK-VREG: %18:gr64 = COPY $r9 -; CHECK-VREG: %19:gr64 = COPY $r8 -; CHECK-VREG: %20:gr64 = COPY $rcx -; CHECK-VREG: %21:gr64 = COPY $rdx -; CHECK-VREG: %22:gr64 = COPY $rsi -; CHECK-VREG: %23:gr64 = COPY $rdi -; CHECK-VREG: %17:gr64 = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.11, align 16) -; CHECK-VREG: %16:gr64 = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) -; CHECK-VREG: %15:gr64 = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.9, align 16) -; CHECK-VREG: %14:gr64 = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) -; CHECK-VREG: %13:gr64 = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) -; CHECK-VREG: %12:gr64 = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) -; CHECK-VREG: %11:gr64 = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) -; CHECK-VREG: %10:gr64 = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) -; CHECK-VREG: %9:gr64 = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) -; CHECK-VREG: %8:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) -; CHECK-VREG: %7:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) -; CHECK-VREG: %6:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) -; CHECK-VREG: %6:gr64, %7:gr64, %8:gr64, %9:gr64, %10:gr64, %11:gr64, %12:gr64, %13:gr64, %14:gr64, %15:gr64, %16:gr64, %17:gr64, %18:gr64, %19:gr64, %20:gr64, %21:gr64, %22:gr64, %23:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, %6(tied-def 0), %7(tied-def 1), %8(tied-def 2), %9(tied-def 3), %10(tied-def 4), %11(tied-def 5), %12(tied-def 6), %13(tied-def 7), %14(tied-def 8), %15(tied-def 9), %16(tied-def 10), %17(tied-def 11), %18(tied-def 12), %19(tied-def 13), %20(tied-def 14), %21(tied-def 15), %22(tied-def 16), %23(tied-def 17), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp -; CHECK-VREG: %38:gr32 = MOV32rm %23, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %22, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %21, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %20, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %19, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %18, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %17, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %16, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %15, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %14, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %13, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %12, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %11, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %10, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %9, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %8, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %7, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) -; CHECK-VREG: %38:gr32 = ADD32rm %38, %6, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) +; CHECK-VREG: %30:gr64 = COPY $r9 +; CHECK-VREG: %31:gr64 = COPY $r8 +; CHECK-VREG: %32:gr64 = COPY $rcx +; CHECK-VREG: %33:gr64 = COPY $rdx +; CHECK-VREG: %34:gr64 = COPY $rsi +; CHECK-VREG: %35:gr64 = COPY $rdi +; CHECK-VREG: %29:gr64 = MOV64rm %fixed-stack.11, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.11, align 16) +; CHECK-VREG: %28:gr64 = MOV64rm %fixed-stack.10, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.10) +; CHECK-VREG: %27:gr64 = MOV64rm %fixed-stack.9, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.9, align 16) +; CHECK-VREG: %26:gr64 = MOV64rm %fixed-stack.8, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.8) +; CHECK-VREG: %25:gr64 = MOV64rm %fixed-stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.7, align 16) +; CHECK-VREG: %24:gr64 = MOV64rm %fixed-stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.6) +; CHECK-VREG: %23:gr64 = MOV64rm %fixed-stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.5, align 16) +; CHECK-VREG: %22:gr64 = MOV64rm %fixed-stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.4) +; CHECK-VREG: %21:gr64 = MOV64rm %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +; CHECK-VREG: %20:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) +; CHECK-VREG: %19:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) +; CHECK-VREG: %18:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) +; CHECK-VREG: %18:gr64, %19:gr64, %20:gr64, %21:gr64, %22:gr64, %23:gr64, %24:gr64, %25:gr64, %26:gr64, %27:gr64, %28:gr64, %29:gr64, %30:gr64, %31:gr64, %32:gr64, %33:gr64, %34:gr64, %35:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, 2, 18, %18(tied-def 0), %19(tied-def 1), %20(tied-def 2), %21(tied-def 3), %22(tied-def 4), %23(tied-def 5), %24(tied-def 6), %25(tied-def 7), %26(tied-def 8), %27(tied-def 9), %28(tied-def 10), %29(tied-def 11), %30(tied-def 12), %31(tied-def 13), %32(tied-def 14), %33(tied-def 15), %34(tied-def 16), %35(tied-def 17), 2, 0, 2, 18, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, csr_64, implicit-def $rsp, implicit-def $ssp +; CHECK-VREG: %38:gr32 = MOV32rm %35, 1, $noreg, 4, $noreg :: (load (s32) from %ir.gep00, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %34, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep01, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %33, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep02, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %32, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep03, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %31, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep04, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %30, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep05, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %29, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep06, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %28, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep07, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %27, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep08, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %26, 1, $noreg, 40, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep09, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %25, 1, $noreg, 44, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep10, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %24, 1, $noreg, 48, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep11, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %23, 1, $noreg, 52, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep12, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %22, 1, $noreg, 56, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep13, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %21, 1, $noreg, 60, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep14, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %20, 1, $noreg, 64, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep15, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %19, 1, $noreg, 68, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep16, addrspace 1) +; CHECK-VREG: %38:gr32 = ADD32rm %38, %18, 1, $noreg, 72, $noreg, implicit-def dead $eflags :: (load (s32) from %ir.gep17, addrspace 1) ; CHECK-VREG: $eax = COPY %38 ; CHECK-PREG: renamable $rbx = COPY $r9 diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -434,20 +434,20 @@ ; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: .Ltmp14: -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movss %xmm0, (%rsp) -; CHECK-NEXT: nopl 8(%rax,%rax) -; CHECK-NEXT: .Ltmp15: +; CHECK-NEXT: movss %xmm0, {{[-0-9]*}}(%rsp) ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: nopl 8(%rax,%rax) +; CHECK-NEXT: .Ltmp15: ; CHECK-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm0, (%rsp) +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: movsd %xmm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: nopl 8(%rax,%rax) ; CHECK-NEXT: .Ltmp16: ; CHECK-NEXT: xorl %eax, %eax