diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -107,7 +107,7 @@ SlotIndex OrigIdx, SlotIndex UseIdx) const { OrigIdx = OrigIdx.getRegSlot(true); - UseIdx = UseIdx.getRegSlot(true); + UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true)); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) diff --git a/llvm/test/CodeGen/AArch64/pr51516.mir b/llvm/test/CodeGen/AArch64/pr51516.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr51516.mir @@ -0,0 +1,208 @@ +# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s + +# Check that we spill %31 and do not rematerialize it since the use operand +# of ADDXri is killed by the STRXui in this block. + +# CHECK-LABEL: name: test +# CHECK: bb.17: +# CHECK: STRXui +# CHECK: LDRXui +# CHECK: bb.18: + +--- +name: test +tracksRegLiveness: true +stack: + - { id: 0, type: variable-sized, offset: 0, alignment: 32, + stack-id: default } +body: | + bb.0.entry: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $x0 + + %22:gpr64common = COPY $x0 + CBZW $wzr, %bb.2 + + bb.1: + successors: %bb.3(0x80000000) + + %1:gpr64 = COPY $xzr + B %bb.3 + + bb.2: + successors: %bb.3(0x80000000) + + $x0 = IMPLICIT_DEF + %1:gpr64 = COPY $x0 + + bb.3: + successors: %bb.4(0x30000000), %bb.5(0x50000000) + + %2:gpr64common = COPY %1 + CBNZX %1, %bb.5 + B %bb.4 + + bb.4: + successors: %bb.5(0x80000000) + + %31:gpr64common = SUBXri $sp, 288, 0 + $sp = ANDXri %31, 7930 + %2:gpr64common = COPY $xzr + + bb.5: + successors: %bb.6(0x80000000) + + %9:gpr64common = COPY $xzr + %35:gpr64sp = ADDXri %2, 32, 0 + %4:gpr64common = UBFMXri %2, 3, 63 + %37:gpr64 = MOVi64imm -506381209866536712 + %38:gpr32 = MOVi32imm -202116109 + %39:gpr64common = nuw ADDXri %22, 836, 0 + %41:gpr64common = nuw ADDXri %22, 648, 0 + STRXui %37, %4, 1 + STRWui %38, %4, 8 + %8:gpr64common = UBFMXri %39, 3, 63 + %7:gpr64common = UBFMXri %41, 3, 63 + %47:gpr64 = MOVi64imm 0 + + bb.6: + successors: %bb.8(0x30000000), %bb.7(0x50000000) + + %44:gpr64common = ADDXrr %22, %9 + %10:gpr64common = ADDXri %44, 648, 0 + %11:gpr64common = ANDSXri %10, 4098, implicit-def $nzcv + Bcc 0, %bb.8, implicit killed $nzcv + B %bb.7 + + bb.7: + successors: %bb.8(0x80000000) + + BL 0, csr_aarch64_aapcs, implicit-def dead $lr + + bb.8: + successors: %bb.9(0x04000000), %bb.24(0x7c000000) + + CBNZW $wzr, %bb.24 + B %bb.9 + + bb.9: + successors: %bb.10(0x7ffff800), %bb.11(0x00000800) + + %55:gpr64common = ADDXrr %22, %9 + %56:gpr64sp = ADDXri %55, 648, 0 + CBZX %11, %bb.10 + B %bb.11 + + bb.10: + successors: %bb.11(0x80000000) + + $x0 = ADDXri %55, 648, 0 + $x2 = IMPLICIT_DEF + $w1 = COPY $wzr + $x1 = nuw ADDXri %35, 32, 0 + BL 0, csr_aarch64_aapcs, implicit-def dead $lr + %66:gpr64sp = nuw ADDXri %35, 48, 0 + $x0 = ADDXri %55, 696, 0 + $x2 = IMPLICIT_DEF + $x1 = COPY %66 + + bb.11: + successors: %bb.15(0x7ffff800), %bb.12(0x00000800) + + CBZX %11, %bb.15 + B %bb.12 + + bb.12: + successors: %bb.13(0x00000000), %bb.14(0x80000000) + + CBNZW $wzr, %bb.14 + B %bb.13 + + bb.13: + successors: + + bb.14: + successors: %bb.18(0x80000000) + + $x1 = COPY %56 + B %bb.18 + + bb.15: + successors: %bb.16(0x00000000), %bb.17(0x80000000) + + %76:gpr32 = LDRBBui %7, 0 + CBZW %76, %bb.17 + B %bb.16 + + bb.16: + successors: + + %74:gpr64common = ADDXrr %22, %9 + %15:gpr64sp = ADDXri %74, 648, 0 + $x0 = COPY %15 + + bb.17: + successors: %bb.18(0x80000000) + + STRXui %22, %55, 81 + + bb.18: + successors: %bb.19(0x80000000), %bb.20(0x00000000) + + %79:gpr32 = LDRBBui %8, 0 + CBNZW %79, %bb.20 + B %bb.19 + + bb.19: + successors: %bb.21(0x80000000), %bb.20(0x00000000) + + %80:gpr32 = MOVi32imm 1 + CBNZW %80, %bb.21 + B %bb.20 + + bb.20: + successors: + + %16:gpr64sp = ADDXri %22, 836, 0 + $x0 = COPY %16 + + bb.21: + successors: %bb.24(0x00000000), %bb.22(0x80000000) + + CBZW $wzr, %bb.24 + + bb.22: + successors: %bb.26(0x80000000) + + B %bb.26 + + bb.24: + successors: %bb.25(0x04000000), %bb.6(0x7c000000) + + %8:gpr64common = ADDXri %8, 24, 0 + %7:gpr64common = ADDXri %7, 24, 0 + CBNZW $wzr, %bb.6 + + bb.25: + successors: %bb.26(0x80000000) + + %56:gpr64sp = COPY $xzr + + bb.26: + successors: %bb.28(0x50000000), %bb.27(0x30000000) + + undef %83.sub_32:gpr64 = MOVi32imm 1172321806 + STRXui %83, %2, 0 + CBNZX %1, %bb.28 + B %bb.27 + + bb.27: + successors: %bb.28(0x80000000) + + STRXui $xzr, %4, 0 + + bb.28: + $x0 = COPY %56 + RET_ReallyLR implicit $x0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/pr51516.mir b/llvm/test/CodeGen/AMDGPU/pr51516.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/pr51516.mir @@ -0,0 +1,114 @@ +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -start-before=machine-scheduler -stop-after=virtregrewriter,1 -o - %s | FileCheck -check-prefix=GCN %s + +# Check that %3 was not rematerialized before the last store since its operand %1 +# is killed by that store. + +# GCN-LABEL: name: global_sextload_v32i32_to_v32i64 +# GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) +# GCN: GLOBAL_STORE_DWORDX4_SADDR killed renamable $vgpr27, killed renamable $vgpr19_vgpr20_vgpr21_vgpr22, killed renamable $sgpr0_sgpr1, 16, 0, implicit $exec, implicit killed renamable $vgpr0 + +--- +name: global_sextload_v32i32_to_v32i64 +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr4_sgpr5 + + %0:sgpr_64(p4) = COPY $sgpr4_sgpr5 + %1:vgpr_32 = COPY $m0 + %2:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 0, 0 + %3:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec + %4:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 112, 0, implicit $exec :: (load (s128)) + %5:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 96, 0, implicit $exec, implicit %1 :: (load (s128)) + %6:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 80, 0, implicit $exec :: (load (s128)) + %7:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 64, 0, implicit $exec :: (load (s128)) + %8:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 48, 0, implicit $exec :: (load (s128)) + %9:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 32, 0, implicit $exec :: (load (s128)) + %10:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 16, 0, implicit $exec :: (load (s128)) + %11:vreg_128 = GLOBAL_LOAD_DWORDX4_SADDR %2.sub2_sub3, %3, 0, 0, implicit $exec :: (load (s128)) + undef %12.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub3, implicit $exec + %12.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub2, implicit $exec + undef %13.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub1, implicit $exec + %13.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %11.sub0, implicit $exec + undef %14.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub3, implicit $exec + %14.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub2, implicit $exec + undef %15.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub1, implicit $exec + %15.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %10.sub0, implicit $exec + undef %16.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub3, implicit $exec + %16.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub2, implicit $exec + undef %17.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub1, implicit $exec + %17.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %9.sub0, implicit $exec + undef %18.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub3, implicit $exec + %18.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub2, implicit $exec + undef %19.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub1, implicit $exec + %19.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %8.sub0, implicit $exec + undef %20.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub3, implicit $exec + %20.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub2, implicit $exec + undef %21.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub1, implicit $exec + %21.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %7.sub0, implicit $exec + undef %22.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub3, implicit $exec + %22.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub2, implicit $exec + undef %23.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub1, implicit $exec + %23.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %6.sub0, implicit $exec + undef %24.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub3, implicit $exec + %24.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub2, implicit $exec + undef %25.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub1, implicit $exec + %25.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %5.sub0, implicit $exec + undef %26.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub3, implicit $exec + %26.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub2, implicit $exec + undef %27.sub3:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub1, implicit $exec + %27.sub1:vreg_128 = V_ASHRREV_I32_e32 31, %4.sub0, implicit $exec + %27.sub0:vreg_128 = COPY %4.sub0 + %27.sub2:vreg_128 = COPY %4.sub1 + GLOBAL_STORE_DWORDX4_SADDR %3, %27, %2.sub0_sub1, 224, 0, implicit $exec + %26.sub0:vreg_128 = COPY %4.sub2 + %26.sub2:vreg_128 = COPY %4.sub3 + GLOBAL_STORE_DWORDX4_SADDR %3, %26, %2.sub0_sub1, 240, 0, implicit $exec + %25.sub0:vreg_128 = COPY %5.sub0 + %25.sub2:vreg_128 = COPY %5.sub1 + GLOBAL_STORE_DWORDX4_SADDR %3, %25, %2.sub0_sub1, 192, 0, implicit $exec + %24.sub0:vreg_128 = COPY %5.sub2 + %24.sub2:vreg_128 = COPY %5.sub3 + GLOBAL_STORE_DWORDX4_SADDR %3, %24, %2.sub0_sub1, 208, 0, implicit $exec + %23.sub0:vreg_128 = COPY %6.sub0 + %23.sub2:vreg_128 = COPY %6.sub1 + GLOBAL_STORE_DWORDX4_SADDR %3, %23, %2.sub0_sub1, 160, 0, implicit $exec + %22.sub0:vreg_128 = COPY %6.sub2 + %22.sub2:vreg_128 = COPY %6.sub3 + GLOBAL_STORE_DWORDX4_SADDR %3, %22, %2.sub0_sub1, 176, 0, implicit $exec + %21.sub0:vreg_128 = COPY %7.sub0 + %21.sub2:vreg_128 = COPY %7.sub1 + GLOBAL_STORE_DWORDX4_SADDR %3, %21, %2.sub0_sub1, 128, 0, implicit $exec + %20.sub0:vreg_128 = COPY %7.sub2 + %20.sub2:vreg_128 = COPY %7.sub3 + GLOBAL_STORE_DWORDX4_SADDR %3, %20, %2.sub0_sub1, 144, 0, implicit $exec + %19.sub0:vreg_128 = COPY %8.sub0 + %19.sub2:vreg_128 = COPY %8.sub1 + GLOBAL_STORE_DWORDX4_SADDR %3, %19, %2.sub0_sub1, 96, 0, implicit $exec + %18.sub0:vreg_128 = COPY %8.sub2 + %18.sub2:vreg_128 = COPY %8.sub3 + GLOBAL_STORE_DWORDX4_SADDR %3, %18, %2.sub0_sub1, 112, 0, implicit $exec + %17.sub0:vreg_128 = COPY %9.sub0 + %17.sub2:vreg_128 = COPY %9.sub1 + GLOBAL_STORE_DWORDX4_SADDR %3, %17, %2.sub0_sub1, 64, 0, implicit $exec + %16.sub0:vreg_128 = COPY %9.sub2 + %16.sub2:vreg_128 = COPY %9.sub3 + GLOBAL_STORE_DWORDX4_SADDR %3, %16, %2.sub0_sub1, 80, 0, implicit $exec + %15.sub0:vreg_128 = COPY %10.sub0 + %15.sub2:vreg_128 = COPY %10.sub1 + GLOBAL_STORE_DWORDX4_SADDR %3, %15, %2.sub0_sub1, 32, 0, implicit $exec + %14.sub0:vreg_128 = COPY %10.sub2 + %14.sub2:vreg_128 = COPY %10.sub3 + GLOBAL_STORE_DWORDX4_SADDR %3, %14, %2.sub0_sub1, 48, 0, implicit $exec + %13.sub0:vreg_128 = COPY %11.sub0 + %13.sub2:vreg_128 = COPY %11.sub1 + GLOBAL_STORE_DWORDX4_SADDR %3, %13, %2.sub0_sub1, 0, 0, implicit $exec + %12.sub0:vreg_128 = COPY %11.sub2 + %12.sub2:vreg_128 = COPY %11.sub3 + GLOBAL_STORE_DWORDX4_SADDR %3, %12, %2.sub0_sub1, 16, 0, implicit $exec, implicit killed %1 + S_ENDPGM 0 + +...