Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2154,8 +2154,27 @@ } if (NewOpc != -1) { + // removeOperand doesn't fixup tied operand indexes at it goes, so + // it asserts. Untie vdst_in for now and retie them afterwards. + int VDstIn = AMDGPU::getNamedOperandIdx(Opc, + AMDGPU::OpName::vdst_in); + bool TiedVDst = VDstIn != -1 && + MI->getOperand(VDstIn).isReg() && + MI->getOperand(VDstIn).isTied(); + if (TiedVDst) + MI->untieRegOperand(VDstIn); + MI->removeOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr)); + + if (TiedVDst) { + int NewVDst = + AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst); + int NewVDstIn = + AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in); + assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!"); + MI->tieOperands(NewVDst, NewVDstIn); + } MI->setDesc(TII->get(NewOpc)); return; } Index: llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.ll @@ -0,0 +1,29 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefix=GCN %s + +%struct0 = type { [4224 x %type.i16] } +%type.i16 = type { i16 } +@_ZZN0 = external hidden addrspace(3) global %struct0, align 8 + +; GCN-LABEL: tied_operand_test: +; GCN: ; %bb.0: ; %entry +; GCN: scratch_load_d16_hi_b16 [[LDRESULT:v[0-9]+]], off, off offset:4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ds_store_b32 v{{[0-9]+}}, [[LDRESULT]] offset:8 +; GCN-NEXT: s_endpgm +define protected amdgpu_kernel void @tied_operand_test(i1 %c1, i1 %c2, i32 %val) { +entry: + %scratch0 = alloca i16, align 4, addrspace(5) + %scratch1 = alloca i16, align 4, addrspace(5) + %first = select i1 %c1, i16 addrspace(5)* %scratch0, i16 addrspace(5)* %scratch1 + %spec.select = select i1 %c2, i16 addrspace(5)* %first, i16 addrspace(5)* %scratch0 + %dead.load = load i16, i16 addrspace(5)* %spec.select, align 2 + %scratch0.load = load i16, i16 addrspace(5)* %scratch0, align 4 + %add4 = add nuw nsw i32 %val, 4 + %addr0 = getelementptr inbounds %struct0, %struct0 addrspace(3)* bitcast (%struct0 addrspace(3)* @_ZZN0 to %struct0 addrspace(3)*), i32 0, i32 0, i32 %add4, i32 0 + store i16 123, i16 addrspace(3)* %addr0, align 2 + %add5 = add nuw nsw i32 %val, 5 + %addr1 = getelementptr inbounds %struct0, %struct0 addrspace(3)* bitcast (%struct0 addrspace(3)* @_ZZN0 to %struct0 addrspace(3)*), i32 0, i32 0, i32 %add5, i32 0 + store i16 %scratch0.load, i16 addrspace(3)* %addr1, align 2 + ret void +} + Index: llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GCN %s +... +--- +name: tied_operand_test +tracksRegLiveness: true +stack: + - { id: 0, type: default, offset: 0, size: 2, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: 0, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + +machineFunctionInfo: + isEntryFunction: true + stackPtrOffsetReg: '$sgpr32' + +body: | + bb.0.entry: + liveins: $sgpr0_sgpr1 + + ; GCN-LABEL: name: tied_operand_test + ; GCN: liveins: $sgpr0_sgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 123, implicit $exec + ; GCN-NEXT: renamable $vgpr0 = SCRATCH_LOAD_SHORT_D16_HI_ST 4, 0, killed renamable $vgpr0, implicit $exec, implicit $flat_scr + ; GCN-NEXT: renamable $sgpr0 = S_LOAD_DWORD_IMM killed renamable $sgpr0_sgpr1, 4, 0 + ; GCN-NEXT: renamable $sgpr0 = S_LSHL_B32 killed renamable $sgpr0, 1, implicit-def dead $scc + ; GCN-NEXT: renamable $vgpr1 = COPY killed renamable $sgpr0, implicit $exec + ; GCN-NEXT: DS_WRITE_B32_gfx9 killed renamable $vgpr1, killed renamable $vgpr0, 8, 0, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + renamable $vgpr0 = V_MOV_B32_e32 123, implicit $exec + renamable $vgpr0 = SCRATCH_LOAD_SHORT_D16_HI_SADDR %stack.0, 0, 0, killed renamable $vgpr0, implicit $exec, implicit $flat_scr + renamable $sgpr0 = S_LOAD_DWORD_IMM killed renamable $sgpr0_sgpr1, 4, 0 + renamable $sgpr0 = S_LSHL_B32 killed renamable $sgpr0, 1, implicit-def dead $scc + renamable $vgpr1 = COPY killed renamable $sgpr0, implicit $exec + DS_WRITE_B32_gfx9 killed renamable $vgpr1, killed renamable $vgpr0, 8, 0, implicit $exec + S_ENDPGM 0 + +...