diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -88,6 +88,10 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_smrd_sgpr_imm : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def gi_flat_offset : GIComplexOperandMatcher, GIComplexPatternEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -193,14 +193,18 @@ bool SelectScratchSVAddr(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &SAddr, SDValue &Offset) const; - bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool Imm, - bool Imm32Only) const; + bool SelectSMRDOffset(SDValue Base, SDValue ByteOffsetNode, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false) const; SDValue Expand32BitAddress(SDValue Addr) const; - bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, bool Imm, - bool Imm32Only = false) const; + bool SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false) const; + bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue *SOffset, + SDValue *Offset, bool Imm32Only = false) const; bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; - bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; + bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &SOffset) const; + bool SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, SDValue &SOffset, + SDValue &Offset) const; bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1886,21 +1886,21 @@ // Match an immediate (if Imm is true) or an SGPR (if Imm is false) // offset. If Imm32Only is true, match only 32-bit immediate offsets // available on CI. -bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, - SDValue &Offset, bool Imm, +bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue Addr, SDValue ByteOffsetNode, + SDValue *SOffset, SDValue *Offset, bool Imm32Only) const { ConstantSDNode *C = dyn_cast(ByteOffsetNode); if (!C) { - if (Imm) + if (!SOffset) return false; if (ByteOffsetNode.getValueType().isScalarInteger() && ByteOffsetNode.getValueType().getSizeInBits() == 32) { - Offset = ByteOffsetNode; + *SOffset = ByteOffsetNode; return true; } if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) { - Offset = ByteOffsetNode.getOperand(0); + *SOffset = ByteOffsetNode.getOperand(0); return true; } } @@ -1912,8 +1912,8 @@ int64_t ByteOffset = C->getSExtValue(); Optional EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false); - if (EncodedOffset && Imm && !Imm32Only) { - Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + if (EncodedOffset && Offset && !Imm32Only) { + *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); return true; } @@ -1922,17 +1922,17 @@ return false; EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); - if (EncodedOffset && Imm32Only) { - Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); + if (EncodedOffset && Offset && Imm32Only) { + *Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); return true; } if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset)) return false; - if (!Imm) { + if (SOffset) { SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); - Offset = SDValue( + *SOffset = SDValue( CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); return true; } @@ -1968,11 +1968,18 @@ // Match a base and an immediate (if Imm is true) or an SGPR // (if Imm is false) offset. If Imm32Only is true, match only 32-bit // immediate offsets available on CI. -bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, - SDValue &Offset, bool Imm, - bool Imm32Only) const { +bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(SDValue Addr, SDValue &SBase, + SDValue *SOffset, SDValue *Offset, + bool Imm32Only) const { SDLoc SL(Addr); + if (SOffset && Offset) { + assert(!Imm32Only); + SDValue B; + return SelectSMRDBaseOffset(Addr, B, nullptr, Offset) && + SelectSMRDBaseOffset(B, SBase, SOffset, nullptr); + } + // A 32-bit (address + offset) should not cause unsigned 32-bit integer // wraparound, because s_load instructions perform the addition in 64 bits. if ((Addr.getValueType() != MVT::i32 || @@ -1987,38 +1994,55 @@ assert(N0 && N1 && isa(N1)); } if (N0 && N1) { - if (SelectSMRDOffset(N1, Offset, Imm, Imm32Only)) { - SBase = Expand32BitAddress(N0); + if (SelectSMRDOffset(N0, N1, SOffset, Offset, Imm32Only)) { + SBase = N0; return true; } - if (SelectSMRDOffset(N0, Offset, Imm, Imm32Only)) { - SBase = Expand32BitAddress(N1); + if (SelectSMRDOffset(N1, N0, SOffset, Offset, Imm32Only)) { + SBase = N1; return true; } } return false; } - if (!Imm) + if (Offset && !SOffset) { + SBase = Addr; + *Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); + return true; + } + return false; +} + +bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, + SDValue *SOffset, SDValue *Offset, + bool Imm32Only) const { + if (!SelectSMRDBaseOffset(Addr, SBase, SOffset, Offset, Imm32Only)) return false; - SBase = Expand32BitAddress(Addr); - Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); + SBase = Expand32BitAddress(SBase); return true; } bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const { - return SelectSMRD(Addr, SBase, Offset, /* Imm */ true); + return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset); } bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const { assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS); - return SelectSMRD(Addr, SBase, Offset, /* Imm */ true, /* Imm32Only */ true); + return SelectSMRD(Addr, SBase, /* SOffset */ nullptr, &Offset, + /* Imm32Only */ true); } bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, - SDValue &Offset) const { - return SelectSMRD(Addr, SBase, Offset, /* Imm */ false); + SDValue &SOffset) const { + return SelectSMRD(Addr, SBase, &SOffset, /* Offset */ nullptr); +} + +bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(SDValue Addr, SDValue &SBase, + SDValue &SOffset, + SDValue &Offset) const { + return SelectSMRD(Addr, SBase, &SOffset, &Offset); } bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -63,11 +63,9 @@ private: struct GEPInfo { - const MachineInstr &GEP; SmallVector SgprParts; SmallVector VgprParts; - int64_t Imm; - GEPInfo(const MachineInstr &GEP) : GEP(GEP), Imm(0) { } + int64_t Imm = 0; }; bool isSGPR(Register Reg) const; @@ -200,12 +198,16 @@ InstructionSelector::ComplexRendererFns selectVINTERPModsHi(MachineOperand &Root) const; + bool selectSmrdOffset(MachineOperand &Root, Register &Base, Register *SOffset, + int64_t *Offset) const; InstructionSelector::ComplexRendererFns selectSmrdImm(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectSmrdImm32(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectSmrdSgpr(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectSmrdSgprImm(MachineOperand &Root) const; std::pair selectFlatOffsetImpl(MachineOperand &Root, uint64_t FlatVariant) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2361,7 +2361,7 @@ if (PtrMI->getOpcode() != TargetOpcode::G_PTR_ADD) return; - GEPInfo GEPInfo(*PtrMI); + GEPInfo GEPInfo; for (unsigned i = 1; i != 3; ++i) { const MachineOperand &GEPOp = PtrMI->getOperand(i); @@ -3800,25 +3800,82 @@ }}; } -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { +bool AMDGPUInstructionSelector::selectSmrdOffset(MachineOperand &Root, + Register &Base, + Register *SOffset, + int64_t *Offset) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + + // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, + // then we can select all ptr + 32-bit offsets. SmallVector AddrInfo; - getAddrModeInfo(*Root.getParent(), *MRI, AddrInfo); + getAddrModeInfo(*MI, *MRI, AddrInfo); - if (AddrInfo.empty() || AddrInfo[0].SgprParts.size() != 1) - return None; + if (AddrInfo.empty()) + return false; - const GEPInfo &GEPInfo = AddrInfo[0]; + const GEPInfo &GEPI = AddrInfo[0]; Optional EncodedImm = - AMDGPU::getSMRDEncodedOffset(STI, GEPInfo.Imm, false); - if (!EncodedImm) + AMDGPU::getSMRDEncodedOffset(STI, GEPI.Imm, false); + + if (SOffset && Offset) { + if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm && + AddrInfo.size() > 1) { + const GEPInfo &GEPI2 = AddrInfo[1]; + if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) { + if (Register OffsetReg = + matchZeroExtendFromS32(*MRI, GEPI2.SgprParts[1])) { + Base = GEPI2.SgprParts[0]; + *SOffset = OffsetReg; + *Offset = *EncodedImm; + return true; + } + } + } + return false; + } + + if (Offset && GEPI.SgprParts.size() == 1 && EncodedImm) { + Base = GEPI.SgprParts[0]; + *Offset = *EncodedImm; + return true; + } + + // SGPR offset is unsigned. + if (SOffset && GEPI.SgprParts.size() == 1 && isUInt<32>(GEPI.Imm) && + GEPI.Imm != 0) { + // If we make it this far we have a load with an 32-bit immediate offset. + // It is OK to select this using a sgpr offset, because we have already + // failed trying to select this load into one of the _IMM variants since + // the _IMM Patterns are considered before the _SGPR patterns. + Base = GEPI.SgprParts[0]; + *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset) + .addImm(GEPI.Imm); + return true; + } + + if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) { + if (Register OffsetReg = matchZeroExtendFromS32(*MRI, GEPI.SgprParts[1])) { + Base = GEPI.SgprParts[0]; + *SOffset = OffsetReg; + return true; + } + } + + return false; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSmrdImm(MachineOperand &Root) const { + Register Base; + int64_t Offset; + if (!selectSmrdOffset(Root, Base, /* SOffset= */ nullptr, &Offset)) return None; - unsigned PtrReg = GEPInfo.SgprParts[0]; - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(*EncodedImm); } - }}; + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}}; } InstructionSelector::ComplexRendererFns @@ -3844,43 +3901,24 @@ InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectSmrdSgpr(MachineOperand &Root) const { - MachineInstr *MI = Root.getParent(); - MachineBasicBlock *MBB = MI->getParent(); - - SmallVector AddrInfo; - getAddrModeInfo(*MI, *MRI, AddrInfo); - - // FIXME: We should shrink the GEP if the offset is known to be <= 32-bits, - // then we can select all ptr + 32-bit offsets. - if (AddrInfo.empty()) + Register Base, SOffset; + if (!selectSmrdOffset(Root, Base, &SOffset, /* Offset= */ nullptr)) return None; - const GEPInfo &GEPInfo = AddrInfo[0]; - Register PtrReg = GEPInfo.SgprParts[0]; - - // SGPR offset is unsigned. - if (AddrInfo[0].SgprParts.size() == 1 && isUInt<32>(GEPInfo.Imm) && - GEPInfo.Imm != 0) { - // If we make it this far we have a load with an 32-bit immediate offset. - // It is OK to select this using a sgpr offset, because we have already - // failed trying to select this load into one of the _IMM variants since - // the _IMM Patterns are considered before the _SGPR patterns. - Register OffsetReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass); - BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), OffsetReg) - .addImm(GEPInfo.Imm); - return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}}; - } + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }}}; +} - if (AddrInfo[0].SgprParts.size() == 2 && GEPInfo.Imm == 0) { - if (Register OffsetReg = - matchZeroExtendFromS32(*MRI, GEPInfo.SgprParts[1])) { - return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(PtrReg); }, - [=](MachineInstrBuilder &MIB) { MIB.addReg(OffsetReg); }}}; - } - } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectSmrdSgprImm(MachineOperand &Root) const { + Register Base, SOffset; + int64_t Offset; + if (!selectSmrdOffset(Root, Base, &SOffset, &Offset)) + return None; - return None; + return {{[=](MachineInstrBuilder &MIB) { MIB.addReg(Base); }, + [=](MachineInstrBuilder &MIB) { MIB.addReg(SOffset); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }}}; } std::pair diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -220,16 +220,23 @@ AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::sbase) == -1) return false; - assert(getNumOperandsNoGlue(Load0) == getNumOperandsNoGlue(Load1)); + unsigned NumOps = getNumOperandsNoGlue(Load0); + if (NumOps != getNumOperandsNoGlue(Load1)) + return false; // Check base reg. if (Load0->getOperand(0) != Load1->getOperand(0)) return false; + // Match register offsets, if both register and immediate offsets present. + assert(NumOps == 4 || NumOps == 5); + if (NumOps == 5 && Load0->getOperand(1) != Load1->getOperand(1)) + return false; + const ConstantSDNode *Load0Offset = - dyn_cast(Load0->getOperand(1)); + dyn_cast(Load0->getOperand(NumOps - 3)); const ConstantSDNode *Load1Offset = - dyn_cast(Load1->getOperand(1)); + dyn_cast(Load1->getOperand(NumOps - 3)); if (!Load0Offset || !Load1Offset) return false; diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td --- a/llvm/lib/Target/AMDGPU/SMInstructions.td +++ b/llvm/lib/Target/AMDGPU/SMInstructions.td @@ -883,6 +883,7 @@ def SMRDImm : ComplexPattern; def SMRDImm32 : ComplexPattern; def SMRDSgpr : ComplexPattern; +def SMRDSgprImm : ComplexPattern; def SMRDBufferImm : ComplexPattern; def SMRDBufferImm32 : ComplexPattern; @@ -903,11 +904,18 @@ // 3. SGPR offset def : GCNPat < - (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)), - (vt (!cast(Instr#"_SGPR") $sbase, $offset, 0)) + (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)), + (vt (!cast(Instr#"_SGPR") $sbase, $soffset, 0)) >; - // 4. No offset + // 4. SGPR+IMM offset + def : GCNPat < + (smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), + (vt (!cast(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> { + let OtherPredicates = [isGFX9Plus]; + } + + // 5. No offset def : GCNPat < (vt (smrd_load (i64 SReg_64:$sbase))), (vt (!cast(Instr#"_IMM") i64:$sbase, 0, 0)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir @@ -1,12 +1,14 @@ # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,GFX9 --- | define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void } define amdgpu_kernel void @smrd_wide() { ret void } define amdgpu_kernel void @constant_address_positive() { ret void } define amdgpu_kernel void @smrd_sgpr() { ret void } + define amdgpu_kernel void @smrd_sgpr_imm() { ret void } ... --- @@ -232,3 +234,28 @@ %5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4) S_ENDPGM 0, implicit %5 ... + +--- + +# Test a load with a (register + immediate) offset. +# GCN-LABEL: name: smrd_sgpr_imm{{$}} +# GFX9-DAG: %[[BASE:.*]]:sreg_64 = COPY $sgpr0_sgpr1 +# GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 +# GFX9: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, + +name: smrd_sgpr_imm +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + %0:sgpr(p4) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s64) = G_ZEXT %1:sgpr(s32) + %4:sgpr(p4) = G_PTR_ADD %0, %2 + %5:sgpr(s64) = G_CONSTANT i64 16 + %6:sgpr(p4) = G_PTR_ADD %4, %5 + %7:sgpr(s32) = G_LOAD %6 :: (dereferenceable invariant load (s32), align 4, addrspace 4) + S_ENDPGM 0, implicit %7 +... diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll --- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -global-isel=0 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefixes=GCN,SDAG %s -; RUN: llc -march=amdgcn -global-isel=1 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefixes=GCN,GISEL %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefixes=GCN,SDAG %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefixes=GCN,GISEL %s @0 = external dso_local addrspace(4) constant [4 x <2 x float>] @1 = external dso_local addrspace(4) constant i32 @@ -49,6 +49,42 @@ ret void } +; GCN-LABEL: name: test_sgpr_plus_imm_offset +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 +; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr2 +; SDAG-DAG: %[[BASE:.*]]:sgpr_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 +; SDAG: S_LOAD_DWORD_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 16, +; GISEL: S_LOAD_DWORD_SGPR_IMM killed renamable $sgpr0_sgpr1, killed renamable $sgpr2, 16, +define amdgpu_ps void @test_sgpr_plus_imm_offset(i8 addrspace(4)* inreg %base, i32 inreg %offset, + i32 addrspace(1)* inreg %out) { + %v1 = getelementptr i8, i8 addrspace(4)* %base, i64 16 + %v2 = zext i32 %offset to i64 + %v3 = getelementptr i8, i8 addrspace(4)* %v1, i64 %v2 + %v4 = bitcast i8 addrspace(4)* %v3 to i32 addrspace(4)* + %v5 = load i32, i32 addrspace(4)* %v4, align 4 + store i32 %v5, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: test_sgpr_plus_imm_offset_x2 +; SDAG-DAG: %[[BASE0:.*]]:sgpr_32 = COPY $sgpr0 +; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1 +; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr2 +; SDAG-DAG: %[[BASE:.*]]:sgpr_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1 +; SDAG: S_LOAD_DWORDX2_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 16, +; GISEL: S_LOAD_DWORDX2_SGPR_IMM killed renamable $sgpr0_sgpr1, killed renamable $sgpr2, 16, +define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(i8 addrspace(4)* inreg %base, i32 inreg %offset, + <2 x i32> addrspace(1)* inreg %out) { + %v1 = getelementptr i8, i8 addrspace(4)* %base, i64 16 + %v2 = zext i32 %offset to i64 + %v3 = getelementptr i8, i8 addrspace(4)* %v1, i64 %v2 + %v4 = bitcast i8 addrspace(4)* %v3 to <2 x i32> addrspace(4)* + %v5 = load <2 x i32>, <2 x i32> addrspace(4)* %v4, align 4 + store <2 x i32> %v5, <2 x i32> addrspace(1)* %out, align 4 + ret void +} + declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #1 ; Function Attrs: nounwind readnone speculatable