Index: lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- lib/Target/AMDGPU/AMDGPUGISel.td +++ lib/Target/AMDGPU/AMDGPUGISel.td @@ -64,6 +64,18 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_ds_1addr_1offset : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + +// Separate load nodes are defined to glue m0 initialization in +// SelectionDAG. The GISel selector can just insert m0 initialization +// directly before before selecting a glue-less load, so hide this +// distinction. +def : GINodeEquiv; + + class GISelSop2Pat < SDPatternOperator node, Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -90,7 +90,7 @@ void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, SmallVectorImpl &AddrInfo) const; bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; - bool selectG_LOAD(MachineInstr &I) const; + bool selectG_LOAD(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; bool selectG_SELECT(MachineInstr &I) const; bool selectG_STORE(MachineInstr &I) const; bool selectG_BRCOND(MachineInstr &I) const; @@ -133,6 +133,13 @@ InstructionSelector::ComplexRendererFns selectMUBUFScratchOffset(MachineOperand &Root) const; + bool isDSOffsetLegal(const MachineRegisterInfo &MRI, + const MachineOperand &Base, + int64_t Offset, unsigned OffsetBits) const; + + InstructionSelector::ComplexRendererFns + selectDS1Addr1Offset(MachineOperand &Root) const; + const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; Index: lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1236,10 +1236,22 @@ return false; } -bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { - // TODO: Can/should we insert m0 initialization here for DS instructions and - // call the normal selector? - return false; +bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I, + CodeGenCoverage &CoverageInfo) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); + unsigned AS = PtrTy.getAddressSpace(); + if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) && + STI.ldsRequiresM0Init()) { + // If DS instructions require M0 initializtion, insert it before selecting. + BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addImm(-1); + } + + return selectImpl(I, CoverageInfo); } bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { @@ -1357,7 +1369,7 @@ return true; return selectImpl(I, CoverageInfo); case TargetOpcode::G_LOAD: - return selectImpl(I, CoverageInfo); + return selectG_LOAD(I, CoverageInfo); case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_STORE: @@ -1696,6 +1708,22 @@ }}}; } +bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, + const MachineOperand &Base, + int64_t Offset, + unsigned OffsetBits) const { + if ((OffsetBits == 16 && !isUInt<16>(Offset)) || + (OffsetBits == 8 && !isUInt<8>(Offset))) + return false; + + if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) + return true; + + // On Southern Islands instruction with a negative base value and an offset + // don't seem to work. + return signBitIsZero(Base, MRI); +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectMUBUFScratchOffset( MachineOperand &Root) const { @@ -1724,3 +1752,49 @@ [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset }}; } + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } + }}; + } + + int64_t ConstAddr = 0; + if (isBaseWithConstantOffset(Root, MRI)) { + const MachineOperand &LHS = RootDef->getOperand(1); + const MachineOperand &RHS = RootDef->getOperand(2); + const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); + const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + if (LHSDef && RHSDef) { + int64_t PossibleOffset = + RHSDef->getOperand(1).getCImm()->getSExtValue(); + if (isDSOffsetLegal(MRI, LHS, PossibleOffset, 16)) { + // (add n0, c0) + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); } + }}; + } + } + } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { + + + + } else if (mi_match(Root.getReg(), MRI, m_ICst(ConstAddr))) { + + + } + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } + }}; +} Index: lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstructions.td +++ lib/Target/AMDGPU/AMDGPUInstructions.td @@ -480,11 +480,13 @@ def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { let IsLoad = 1; + let IsNonExtLoad = 1; let MinAlignment = 8; } def load_align16_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { let IsLoad = 1; + let IsNonExtLoad = 1; let MinAlignment = 16; } Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -328,13 +328,13 @@ >; def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { - let IsUnindexed = 1; let IsLoad = 1; + let IsUnindexed = 1; } def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { - let IsNonExtLoad = 1; let IsLoad = 1; + let IsNonExtLoad = 1; } def atomic_load_32_glue : PatFrag<(ops node:$ptr), @@ -396,7 +396,9 @@ let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { -def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)>; +def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { + let IsNonExtLoad = 1; +} let MemoryVT = i8 in { def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; @@ -412,9 +414,11 @@ def load_align8_local_m0 : LoadFrag , LocalAddress { let MinAlignment = 8; + let IsNonExtLoad = 1; } def load_align16_local_m0 : LoadFrag , LocalAddress { let MinAlignment = 16; + let IsNonExtLoad = 1; } } // End IsLoad = 1 Index: test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -0,0 +1,894 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + + +--- + +name: load_local_s32_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-LABEL: name: load_local_s32_from_4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX9-LABEL: name: load_local_s32_from_4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_s32_from_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_2 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX7-LABEL: name: load_local_s32_from_2 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX9-LABEL: name: load_local_s32_from_2 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_s32_from_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: load_local_s32_from_1 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: load_local_s32_from_1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_v2s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX6-LABEL: name: load_local_v2s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-LABEL: name: load_local_v2s32 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX9-LABEL: name: load_local_v2s32 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v2s32_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX6-LABEL: name: load_local_v2s32_align4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-LABEL: name: load_local_v2s32_align4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-LABEL: name: load_local_v2s32_align4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v3s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v3s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX7-LABEL: name: load_local_v3s32 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-LABEL: name: load_local_v3s32 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_local_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v4s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX7-LABEL: name: load_local_v4s32 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-LABEL: name: load_local_v4s32 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_local_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s64 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX7-LABEL: name: load_local_s64 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_local_s64 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_s64_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s64_align4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX7-LABEL: name: load_local_s64_align4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_local_s64_align4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v2s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v2s64 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX7-LABEL: name: load_local_v2s64 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-LABEL: name: load_local_v2s64 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_local_v2p1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v2p1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-LABEL: name: load_local_v2p1 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-LABEL: name: load_local_v2p1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_local_s96 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s96 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-LABEL: name: load_local_s96 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-LABEL: name: load_local_s96 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_local_s128 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s128 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-LABEL: name: load_local_s128 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-LABEL: name: load_local_s128 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_local_p3_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p3_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-LABEL: name: load_local_p3_from_4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_local_p3_from_4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_p5_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p5_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-LABEL: name: load_local_p5_from_4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_local_p5_from_4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_p1_align8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p1_align8 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-LABEL: name: load_local_p1_align8 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_local_p1_align8 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_p1_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p1_align4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-LABEL: name: load_local_p1_align4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_local_p1_align4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_p999_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p999_from_8 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-LABEL: name: load_local_p999_from_8 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-LABEL: name: load_local_p999_from_8 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v2p3 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v2p3 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-LABEL: name: load_local_v2p3 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-LABEL: name: load_local_v2p3 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v2s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v2s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-LABEL: name: load_local_v2s16 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-LABEL: name: load_local_v2s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_v4s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v4s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-LABEL: name: load_local_v4s16 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-LABEL: name: load_local_v4s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +# --- + +# name: load_local_v6s16 +# legalized: true +# regBankSelected: true +# tracksRegLiveness: true +# machineFunctionInfo: +# scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 +# scratchWaveOffsetReg: $sgpr4 +# stackPtrOffsetReg: $sgpr32 + +# body: | +# bb.0: +# liveins: $vgpr0 + +# %0:vgpr(p3) = COPY $vgpr0 +# %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 3) +# $vgpr0_vgpr1_vgpr2 = COPY %1 + +# ... + +--- + +name: load_local_v8s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v8s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX7-LABEL: name: load_local_v8s16 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX9-LABEL: name: load_local_v8s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +################################################################################ +### Stress addressing modes +################################################################################ + +--- + +name: load_local_s32_from_1_gep_65535 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 65535 + %2:vgpr(p3) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3) + $vgpr0 = COPY %3 + +... + +--- + +name: load_local_s32_from_1_gep_65536 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_1_gep_65536 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: load_local_s32_from_1_gep_65536 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec + ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 65536 + %2:vgpr(p3) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3) + $vgpr0 = COPY %3 + +... + +--- + +name: load_local_s32_from_1_gep_m1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: load_local_s32_from_1_gep_m1 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -1 + %2:vgpr(p3) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3) + $vgpr0 = COPY %3 + +...