diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -810,6 +810,8 @@ bool WasCopy = MI->isCopy(); Register ImpReg; + bool UntieRegs = MI->getOpcode() == TargetOpcode::STATEPOINT; + // Spill subregs if the target allows it. // We always want to spill subregs for stackmap/patchpoint pseudos. bool SpillSubRegs = TII.isSubregFoldable() || @@ -829,6 +831,9 @@ continue; } + if (UntieRegs && MO.isTied()) + MI->untieRegOperand(Idx); + if (!SpillSubRegs && MO.getSubReg()) return false; // We cannot fold a load instruction into a def. diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -471,6 +471,7 @@ ArrayRef Ops, int FrameIndex, const TargetInstrInfo &TII) { unsigned StartIdx = 0; + unsigned NumDefs = 0; switch (MI.getOpcode()) { case TargetOpcode::STACKMAP: { // StackMapLiveValues are foldable @@ -486,16 +487,28 @@ case TargetOpcode::STATEPOINT: { // For statepoints, fold deopt and gc arguments, but not call arguments. StartIdx = StatepointOpers(&MI).getVarIdx(); + NumDefs = MI.getNumDefs(); break; } default: llvm_unreachable("unexpected stackmap opcode"); } + unsigned DefToFoldIdx = MI.getNumOperands(); + // Return false if any operands requested for folding are not foldable (not // part of the stackmap's live values). for (unsigned Op : Ops) { - if (Op < StartIdx) + if (Op < NumDefs) + DefToFoldIdx = Op; + else if (Op < StartIdx) + return nullptr; + // When called from regalloc (InlineSpiller), operands must be untied, + // and regalloc will take care of (re)loading operand from memory. + // But when called from other places (e.g. peephole pass), + // we cannot fold operand which are tied - callers are unaware they + // need to reload destination register. + if (MI.getOperand(Op).isTied()) return nullptr; } @@ -505,11 +518,16 @@ // No need to fold return, the meta data, and function arguments for (unsigned i = 0; i < StartIdx; ++i) - MIB.add(MI.getOperand(i)); + if (i != DefToFoldIdx) + MIB.add(MI.getOperand(i)); - for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) { + for (unsigned i = StartIdx, e = MI.getNumOperands(); i < e; ++i) { MachineOperand &MO = MI.getOperand(i); + unsigned TiedTo = e; + (void)MI.isRegTiedToDefOperand(i, &TiedTo); + if (is_contained(Ops, i)) { + assert(TiedTo == e && "Cannot fold tied operands"); unsigned SpillSize; unsigned SpillOffset; // Compute the spill slot size and offset. @@ -523,9 +541,15 @@ MIB.addImm(SpillSize); MIB.addFrameIndex(FrameIndex); MIB.addImm(SpillOffset); - } - else + } else { MIB.add(MO); + if (TiedTo < e) { + assert(TiedTo < NumDefs && "Bad tied operand"); + if (TiedTo > DefToFoldIdx) + --TiedTo; + NewMI->tieOperands(TiedTo, NewMI->getNumOperands() - 1); + } + } } return NewMI; } diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-folding.mir b/llvm/test/CodeGen/X86/statepoint-vreg-folding.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/statepoint-vreg-folding.mir @@ -0,0 +1,198 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=greedy -o - %s | FileCheck %s + +--- | + ; ModuleID = 'folding.ll' + source_filename = "folding.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-pc-linux-gnu" + + declare void @func() + + define i32 @test_spill(i32 addrspace(1)* %arg00, i32 addrspace(1)* %arg01, i32 addrspace(1)* %arg02, i32 addrspace(1)* %arg03, i32 addrspace(1)* %arg04, i32 addrspace(1)* %arg05, i32 addrspace(1)* %arg06, i32 addrspace(1)* %arg07, i32 addrspace(1)* %arg08) gc "statepoint-example" { + %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* %arg00, i32 addrspace(1)* %arg01, i32 addrspace(1)* %arg02, i32 addrspace(1)* %arg03, i32 addrspace(1)* %arg04, i32 addrspace(1)* %arg05, i32 addrspace(1)* %arg06, i32 addrspace(1)* %arg07, i32 addrspace(1)* %arg08) ] + %rel00 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 0, i32 0) ; (%arg00, %arg00) + %rel01 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 1, i32 1) ; (%arg01, %arg01) + %rel02 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 2, i32 2) ; (%arg02, %arg02) + %rel03 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 3, i32 3) ; (%arg03, %arg03) + %rel04 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 4, i32 4) ; (%arg04, %arg04) + %rel05 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 5, i32 5) ; (%arg05, %arg05) + %rel06 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 6, i32 6) ; (%arg06, %arg06) + %rel07 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 7, i32 7) ; (%arg07, %arg07) + %rel08 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 8, i32 8) ; (%arg08, %arg08) + %gep00 = getelementptr i32, i32 addrspace(1)* %rel00, i64 1 + %gep01 = getelementptr i32, i32 addrspace(1)* %rel01, i64 2 + %gep02 = getelementptr i32, i32 addrspace(1)* %rel02, i64 3 + %gep03 = getelementptr i32, i32 addrspace(1)* %rel03, i64 4 + %gep04 = getelementptr i32, i32 addrspace(1)* %rel04, i64 5 + %gep05 = getelementptr i32, i32 addrspace(1)* %rel05, i64 6 + %gep06 = getelementptr i32, i32 addrspace(1)* %rel06, i64 7 + %gep07 = getelementptr i32, i32 addrspace(1)* %rel07, i64 8 + %gep08 = getelementptr i32, i32 addrspace(1)* %rel08, i64 9 + %val00 = load i32, i32 addrspace(1)* %gep00, align 4 + %val01 = load i32, i32 addrspace(1)* %gep01, align 4 + %sum01 = add i32 %val00, %val01 + %val02 = load i32, i32 addrspace(1)* %gep02, align 4 + %sum02 = add i32 %sum01, %val02 + %val03 = load i32, i32 addrspace(1)* %gep03, align 4 + %sum03 = add i32 %sum02, %val03 + %val04 = load i32, i32 addrspace(1)* %gep04, align 4 + %sum04 = add i32 %sum03, %val04 + %val05 = load i32, i32 addrspace(1)* %gep05, align 4 + %sum05 = add i32 %sum04, %val05 + %val06 = load i32, i32 addrspace(1)* %gep06, align 4 + %sum06 = add i32 %sum05, %val06 + %val07 = load i32, i32 addrspace(1)* %gep07, align 4 + %sum07 = add i32 %sum06, %val07 + %val08 = load i32, i32 addrspace(1)* %gep08, align 4 + %sum08 = add i32 %sum07, %val08 + ret i32 %sum08 + } + + ; Function Attrs: nounwind readonly + declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32 immarg, i32 immarg) #0 + + declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 immarg, i32 immarg, void ()*, i32 immarg, i32 immarg, ...) + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #1 + + attributes #0 = { nounwind readonly } + attributes #1 = { nounwind } + +... +--- +name: test_spill +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: + - { id: 0, class: gr64, preferred-register: '' } + - { id: 1, class: gr64, preferred-register: '' } + - { id: 2, class: gr64, preferred-register: '' } + - { id: 3, class: gr64, preferred-register: '' } + - { id: 4, class: gr64, preferred-register: '' } + - { id: 5, class: gr64, preferred-register: '' } + - { id: 6, class: gr64, preferred-register: '' } + - { id: 7, class: gr64, preferred-register: '' } + - { id: 8, class: gr64, preferred-register: '' } + - { id: 9, class: gr64, preferred-register: '' } + - { id: 10, class: gr64, preferred-register: '' } + - { id: 11, class: gr64, preferred-register: '' } + - { id: 12, class: gr64, preferred-register: '' } + - { id: 13, class: gr64, preferred-register: '' } + - { id: 14, class: gr64, preferred-register: '' } + - { id: 15, class: gr64, preferred-register: '' } + - { id: 16, class: gr64, preferred-register: '' } + - { id: 17, class: gr64, preferred-register: '' } + - { id: 18, class: gr32, preferred-register: '' } + - { id: 19, class: gr32, preferred-register: '' } + - { id: 20, class: gr32, preferred-register: '' } + - { id: 21, class: gr32, preferred-register: '' } + - { id: 22, class: gr32, preferred-register: '' } + - { id: 23, class: gr32, preferred-register: '' } + - { id: 24, class: gr32, preferred-register: '' } + - { id: 25, class: gr32, preferred-register: '' } + - { id: 26, class: gr32, preferred-register: '' } +liveins: + - { reg: '$rdi', virtual-reg: '%0' } + - { reg: '$rsi', virtual-reg: '%1' } + - { reg: '$rdx', virtual-reg: '%2' } + - { reg: '$rcx', virtual-reg: '%3' } + - { reg: '$r8', virtual-reg: '%4' } + - { reg: '$r9', virtual-reg: '%5' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: true + stackProtector: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: + - { id: 0, type: default, offset: 16, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, type: default, offset: 8, size: 8, alignment: 8, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, type: default, offset: 0, size: 8, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +stack: [] +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0 (%ir-block.0): + liveins: $rdi, $rsi, $rdx, $rcx, $r8, $r9 + + ; CHECK-LABEL: name: test_spill + ; CHECK: liveins: $rdi, $rsi, $rdx, $rcx, $r8, $r9 + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $r9 :: (store 8 into %stack.0) + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $r8 :: (store 8 into %stack.1) + ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rcx :: (store 8 into %stack.2) + ; CHECK: [[R1:%[0-9]+]]:gr64 = COPY $rdx + ; CHECK: [[R2:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK: [[R3:%[0-9]+]]:gr64 = COPY $rdi + ; CHECK: [[R4:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16) + ; CHECK: [[R5:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1) + ; CHECK: [[R6:%[0-9]+]]:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2, align 16) + ; CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: [[R6]]:gr64, [[R5]]:gr64, [[R4]]:gr64, [[R1]]:gr64, [[R2]]:gr64, [[R3]]:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, [[R6]], [[R6]](tied-def 0), [[R5]], [[R5]](tied-def 1), [[R4]], [[R4]](tied-def 2), 1, 8, %stack.0, 0, 1, 8, %stack.0, 0, 1, 8, %stack.1, 0, 1, 8, %stack.1, 0, 1, 8, %stack.2, 0, 1, 8, %stack.2, 0, [[R1]], [[R1]](tied-def 3), [[R2]], [[R2]](tied-def 4), [[R3]], [[R3]](tied-def 5), csr_64, implicit-def $rsp, implicit-def $ssp :: (load store 8 on %stack.0), (load store 8 on %stack.1), (load store 8 on %stack.2) + ; CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ; CHECK: [[RES:%[0-9]+]]:gr32 = MOV32rm [[R3]], 1, $noreg, 4, $noreg :: (load 4 from %ir.gep00, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R2]], 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep01, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R1]], 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep02, addrspace 1) + ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm]], 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep03, addrspace 1) + ; CHECK: [[MOV64rm1:%[0-9]+]]:gr64 = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm1]], 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep04, addrspace 1) + ; CHECK: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[MOV64rm2]], 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep05, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R4]], 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep06, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R5]], 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep07, addrspace 1) + ; CHECK: [[RES]]:gr32 = ADD32rm [[RES]], [[R6]], 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep08, addrspace 1) + ; CHECK: $eax = COPY [[RES]] + ; CHECK: RET 0, $eax + %12:gr64 = COPY $r9 + %13:gr64 = COPY $r8 + %14:gr64 = COPY $rcx + %15:gr64 = COPY $rdx + %16:gr64 = COPY $rsi + %17:gr64 = COPY $rdi + %11:gr64 = MOV64rm %fixed-stack.2, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.2, align 16) + %10:gr64 = MOV64rm %fixed-stack.1, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.1) + %9:gr64 = MOV64rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16) + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %9:gr64, %10:gr64, %11:gr64, %12:gr64, %13:gr64, %14:gr64, %15:gr64, %16:gr64, %17:gr64 = STATEPOINT 0, 0, 0, @func, 2, 0, 2, 0, 2, 0, %9, %9(tied-def 0), %10, %10(tied-def 1), %11, %11(tied-def 2), %12, %12(tied-def 3), %13, %13(tied-def 4), %14, %14(tied-def 5), %15, %15(tied-def 6), %16, %16(tied-def 7), %17, %17(tied-def 8), csr_64, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + %20:gr32 = MOV32rm %17, 1, $noreg, 4, $noreg :: (load 4 from %ir.gep00, addrspace 1) + %20:gr32 = ADD32rm %20, %16, 1, $noreg, 8, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep01, addrspace 1) + %20:gr32 = ADD32rm %20, %15, 1, $noreg, 12, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep02, addrspace 1) + %20:gr32 = ADD32rm %20, %14, 1, $noreg, 16, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep03, addrspace 1) + %20:gr32 = ADD32rm %20, %13, 1, $noreg, 20, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep04, addrspace 1) + %20:gr32 = ADD32rm %20, %12, 1, $noreg, 24, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep05, addrspace 1) + %20:gr32 = ADD32rm %20, %11, 1, $noreg, 28, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep06, addrspace 1) + %20:gr32 = ADD32rm %20, %10, 1, $noreg, 32, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep07, addrspace 1) + %20:gr32 = ADD32rm %20, %9, 1, $noreg, 36, $noreg, implicit-def dead $eflags :: (load 4 from %ir.gep08, addrspace 1) + $eax = COPY %20 + RET 0, killed $eax + +...