diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -229,7 +229,7 @@ def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug", "HasFlatSegmentOffsetBug", "true", - "GFX10 bug, inst_offset ignored in flat segment" + "GFX10 bug where inst_offset is ignored when flat instructions access global memory" >; def FeatureOffset3fBug : SubtargetFeature<"offset-3f-bug", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -70,10 +70,10 @@ def gi_flat_offset : GIComplexOperandMatcher, - GIComplexPatternEquiv; -def gi_flat_offset_signed : - GIComplexOperandMatcher, - GIComplexPatternEquiv; + GIComplexPatternEquiv; +def gi_global_offset : + GIComplexOperandMatcher, + GIComplexPatternEquiv; def gi_global_saddr : GIComplexOperandMatcher, GIComplexPatternEquiv; @@ -86,7 +86,7 @@ GIComplexPatternEquiv; def gi_flat_scratch_offset : - GIComplexOperandMatcher, + GIComplexOperandMatcher, GIComplexPatternEquiv; def gi_flat_scratch_saddr : diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -201,9 +201,15 @@ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; - template + bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset, + AMDGPU::FlatInstVariant Variant) const; bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset) const; + bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset) const; + bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, + SDValue &Offset) const; bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, SDValue &VOffset, SDValue &Offset) const; bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, @@ -1649,24 +1655,25 @@ llvm_unreachable("cannot find MemSDNode in the pattern!"); } -template -bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, - SDValue Addr, - SDValue &VAddr, - SDValue &Offset) const { +bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl( + SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset, + AMDGPU::FlatInstVariant Variant) const { int64_t OffsetVal = 0; unsigned AS = findMemSDNode(N)->getAddressSpace(); - if (Subtarget->hasFlatInstOffsets() && - (!Subtarget->hasFlatSegmentOffsetBug() || - AS != AMDGPUAS::FLAT_ADDRESS)) { + bool CanHaveFlatSegmentOffsetBug = + Subtarget->hasFlatSegmentOffsetBug() && + Variant == AMDGPU::FlatInstVariant::Flat && + (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS); + + if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) { SDValue N0, N1; if (isBaseWithConstantOffset64(Addr, N0, N1)) { uint64_t COffsetVal = cast(N1)->getSExtValue(); const SIInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) { + if (TII->isLegalFLATOffset(COffsetVal, AS, Variant)) { Addr = N0; OffsetVal = COffsetVal; } else { @@ -1683,8 +1690,8 @@ SDLoc DL(N); uint64_t RemainderOffset; - std::tie(OffsetVal, RemainderOffset) - = TII->splitFlatOffset(COffsetVal, AS, IsSigned); + std::tie(OffsetVal, RemainderOffset) = + TII->splitFlatOffset(COffsetVal, AS, Variant); SDValue AddOffsetLo = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL); @@ -1741,6 +1748,27 @@ return true; } +bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr, + SDValue &VAddr, + SDValue &Offset) const { + return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, + AMDGPU::FlatInstVariant::Flat); +} + +bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr, + SDValue &VAddr, + SDValue &Offset) const { + return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, + AMDGPU::FlatInstVariant::Global); +} + +bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr, + SDValue &VAddr, + SDValue &Offset) const { + return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, + AMDGPU::FlatInstVariant::Scratch); +} + // If this matches zero_extend i32:x, return x static SDValue matchZExtFromI32(SDValue Op) { if (Op.getOpcode() != ISD::ZERO_EXTEND) @@ -1766,7 +1794,8 @@ int64_t COffsetVal = cast(RHS)->getSExtValue(); const SIInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true)) { + if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, + AMDGPU::FlatInstVariant::Global)) { Addr = LHS; ImmOffset = COffsetVal; } else if (!LHS->isDivergent() && COffsetVal > 0) { @@ -1774,8 +1803,9 @@ // saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) + // (large_offset & MaxOffset); int64_t SplitImmOffset, RemainderOffset; - std::tie(SplitImmOffset, RemainderOffset) - = TII->splitFlatOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true); + std::tie(SplitImmOffset, RemainderOffset) = + TII->splitFlatOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, + AMDGPU::FlatInstVariant::Global); if (isUInt<32>(RemainderOffset)) { SDNode *VMov = CurDAG->getMachineNode( @@ -1864,14 +1894,16 @@ const SIInstrInfo *TII = Subtarget->getInstrInfo(); - if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) { + if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, + AMDGPU::FlatInstVariant::Scratch)) { const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(*Subtarget, true); // Use signed division by a power of two to truncate towards 0. int64_t D = 1LL << (NumBits - 1); int64_t RemainderOffset = (COffsetVal / D) * D; int64_t ImmField = COffsetVal - RemainderOffset; - assert(TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true)); + assert(TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, + AMDGPU::FlatInstVariant::Scratch)); assert(RemainderOffset + ImmField == COffsetVal); COffsetVal = ImmField; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -13,6 +13,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H +#include "SIDefines.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/Register.h" #include "llvm/IR/InstrTypes.h" @@ -191,14 +192,16 @@ InstructionSelector::ComplexRendererFns selectSmrdSgpr(MachineOperand &Root) const; - template std::pair - selectFlatOffsetImpl(MachineOperand &Root) const; + selectFlatOffsetImpl(MachineOperand &Root, + AMDGPU::FlatInstVariant Variant) const; InstructionSelector::ComplexRendererFns selectFlatOffset(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns - selectFlatOffsetSigned(MachineOperand &Root) const; + selectGlobalOffset(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns + selectScratchOffset(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectGlobalSAddr(MachineOperand &Root) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2981,7 +2981,8 @@ // FIXME: This is only needed because tablegen requires number of dst operands // in match and replace pattern to be the same. Otherwise patterns can be // exported from SDag path. - auto Addr = selectFlatOffsetImpl(MI.getOperand(2)); + auto Addr = + selectFlatOffsetImpl(MI.getOperand(2), AMDGPU::FlatInstVariant::Global); Register Data = MI.getOperand(3).getReg(); const unsigned Opc = MRI->getType(Data).isVector() ? @@ -3395,9 +3396,8 @@ }}; } -template -std::pair -AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { +std::pair AMDGPUInstructionSelector::selectFlatOffsetImpl( + MachineOperand &Root, AMDGPU::FlatInstVariant Variant) const { MachineInstr *MI = Root.getParent(); auto Default = std::make_pair(Root.getReg(), 0); @@ -3413,7 +3413,7 @@ return Default; unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); - if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, Signed)) + if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, Variant)) return Default; return std::make_pair(PtrBase, ConstOffset); @@ -3421,7 +3421,8 @@ InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { - auto PtrWithOffset = selectFlatOffsetImpl(Root); + auto PtrWithOffset = + selectFlatOffsetImpl(Root, AMDGPU::FlatInstVariant::Flat); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); }, @@ -3430,8 +3431,20 @@ } InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { - auto PtrWithOffset = selectFlatOffsetImpl(Root); +AMDGPUInstructionSelector::selectGlobalOffset(MachineOperand &Root) const { + auto PtrWithOffset = + selectFlatOffsetImpl(Root, AMDGPU::FlatInstVariant::Global); + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); }, + }}; +} + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectScratchOffset(MachineOperand &Root) const { + auto PtrWithOffset = + selectFlatOffsetImpl(Root, AMDGPU::FlatInstVariant::Scratch); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); }, @@ -3470,7 +3483,8 @@ std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI); if (ConstOffset != 0) { - if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, true)) { + if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, + AMDGPU::FlatInstVariant::Global)) { Addr = PtrBase; ImmOffset = ConstOffset; } else if (ConstOffset > 0) { @@ -3484,8 +3498,9 @@ // saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) // + (large_offset & MaxOffset); int64_t SplitImmOffset, RemainderOffset; - std::tie(SplitImmOffset, RemainderOffset) - = TII.splitFlatOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, true); + std::tie(SplitImmOffset, RemainderOffset) = + TII.splitFlatOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, + AMDGPU::FlatInstVariant::Global); if (isUInt<32>(RemainderOffset)) { MachineInstr *MI = Root.getParent(); @@ -3577,7 +3592,8 @@ std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI); if (ConstOffset != 0 && - TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) { + TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, + AMDGPU::FlatInstVariant::Scratch)) { Addr = PtrBase; ImmOffset = ConstOffset; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -def FLATOffset : ComplexPattern", [], [SDNPWantRoot], -10>; -def FLATOffsetSigned : ComplexPattern", [], [SDNPWantRoot], -10>; -def ScratchOffset : ComplexPattern", [], [SDNPWantRoot], -10>; +def FlatOffset : ComplexPattern; +def GlobalOffset : ComplexPattern; +def ScratchOffset : ComplexPattern; def GlobalSAddr : ComplexPattern; def ScratchSAddr : ComplexPattern; @@ -392,7 +392,7 @@ (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), " $vdst, $vaddr, $vdata$offset$cpol", [(set vt:$vdst, - (atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, + (atomic (FlatOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, GlobalSaddrTable<0, opName#"_rtn">, AtomicNoRet { let FPAtomic = isFP; @@ -450,7 +450,7 @@ (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), " $vdst, $vaddr, $vdata, off$offset$cpol", [(set vt:$vdst, - (atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>, + (atomic (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, GlobalSaddrTable<0, opName#"_rtn">, AtomicNoRet { let has_saddr = 1; @@ -817,17 +817,17 @@ // Patterns for global loads with no offset. class FlatLoadPat : GCNPat < - (vt (node (FLATOffset i64:$vaddr, i16:$offset))), + (vt (node (FlatOffset i64:$vaddr, i16:$offset))), (inst $vaddr, $offset) >; class FlatLoadPat_D16 : GCNPat < - (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), + (node (FlatOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), (inst $vaddr, $offset, 0, $in) >; class FlatSignedLoadPat_D16 : GCNPat < - (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in), + (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), (inst $vaddr, $offset, 0, $in) >; @@ -837,7 +837,7 @@ >; class FlatLoadSignedPat : GCNPat < - (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset))), + (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset))), (inst $vaddr, $offset) >; @@ -871,19 +871,19 @@ >; class FlatStorePat : GCNPat < - (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset)), + (node vt:$data, (FlatOffset i64:$vaddr, i16:$offset)), (inst $vaddr, getVregSrcForVT.ret:$data, $offset) >; class FlatStoreSignedPat : GCNPat < - (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset)), + (node vt:$data, (GlobalOffset i64:$vaddr, i16:$offset)), (inst $vaddr, getVregSrcForVT.ret:$data, $offset) >; class FlatStoreAtomicPat : GCNPat < // atomic store follows atomic binop convention so the address comes // first. - (node (FLATOffset i64:$vaddr, i16:$offset), vt:$data), + (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), (inst $vaddr, getVregSrcForVT.ret:$data, $offset) >; @@ -891,29 +891,29 @@ ValueType vt, ValueType data_vt = vt> : GCNPat < // atomic store follows atomic binop convention so the address comes // first. - (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data), + (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data), (inst $vaddr, getVregSrcForVT.ret:$data, $offset) >; class FlatAtomicPat : GCNPat < - (vt (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data)), + (vt (node (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), (inst $vaddr, $data, $offset) >; class FlatAtomicPatNoRtn : GCNPat < - (node (FLATOffset i64:$vaddr, i16:$offset), vt:$data), + (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), (inst VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset) >; class FlatSignedAtomicPatNoRtn : GCNPat < - (node (FLATOffsetSigned i64:$vaddr, i16:$offset), vt:$data), + (node (GlobalOffset i64:$vaddr, i16:$offset), vt:$data), (inst VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset) >; class FlatSignedAtomicPat : GCNPat < - (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)), + (vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), (inst $vaddr, $data, $offset) >; diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -764,6 +764,9 @@ } // namespace VOP3PEncoding +// The variants of flat instructions +enum class FlatInstVariant : uint8_t { Flat, Global, Scratch }; + } // namespace AMDGPU #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -130,7 +130,8 @@ MFI.getObjectAlign(FI)); if (ST.enableFlatScratch()) { - if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { + if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, + AMDGPU::FlatInstVariant::Scratch)) { BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) .addReg(SpillReg, RegState::Kill) .addReg(SPReg) @@ -239,7 +240,8 @@ MFI.getObjectAlign(FI)); if (ST.enableFlatScratch()) { - if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { + if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, + AMDGPU::FlatInstVariant::Scratch)) { BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg) .addReg(SPReg) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1239,7 +1239,7 @@ return AM.Scale == 0 && (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset( AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS, - /*Signed=*/false)); + AMDGPU::FlatInstVariant::Flat)); } bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const { @@ -1247,7 +1247,7 @@ return AM.Scale == 0 && (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset( AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS, - /*Signed=*/true)); + AMDGPU::FlatInstVariant::Global)); if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) { // Assume the we will use FLAT for all global memory accesses diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1070,13 +1070,13 @@ /// encoded instruction. If \p Signed, this is for an instruction that /// interprets the offset as signed. bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, - bool Signed) const; + AMDGPU::FlatInstVariant Variant) const; /// Split \p COffsetVal into {immediate offset field, remainder offset} /// values. - std::pair splitFlatOffset(int64_t COffsetVal, - unsigned AddrSpace, - bool IsSigned) const; + std::pair + splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, + AMDGPU::FlatInstVariant Variant) const; /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. /// Return -1 if the target-specific opcode for the pseudo instruction does diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7300,25 +7300,30 @@ } bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, - bool Signed) const { + AMDGPU::FlatInstVariant Variant) const { // TODO: Should 0 be special cased? if (!ST.hasFlatInstOffsets()) return false; - if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) + if (ST.hasFlatSegmentOffsetBug() && + Variant == AMDGPU::FlatInstVariant::Flat && + (AddrSpace == AMDGPUAS::FLAT_ADDRESS || + AddrSpace == AMDGPUAS::GLOBAL_ADDRESS)) return false; + bool Signed = Variant != AMDGPU::FlatInstVariant::Flat; unsigned N = AMDGPU::getNumFlatOffsetBits(ST, Signed); return Signed ? isIntN(N, Offset) : isUIntN(N, Offset); } -std::pair SIInstrInfo::splitFlatOffset(int64_t COffsetVal, - unsigned AddrSpace, - bool IsSigned) const { +std::pair +SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace, + AMDGPU::FlatInstVariant Variant) const { int64_t RemainderOffset = COffsetVal; int64_t ImmField = 0; - const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST, IsSigned); - if (IsSigned) { + bool Signed = Variant != AMDGPU::FlatInstVariant::Flat; + const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST, Signed); + if (Signed) { // Use signed division by a power of two to truncate towards 0. int64_t D = 1LL << (NumBits - 1); RemainderOffset = (COffsetVal / D) * D; @@ -7328,7 +7333,7 @@ RemainderOffset = COffsetVal - ImmField; } - assert(isLegalFLATOffset(ImmField, AddrSpace, IsSigned)); + assert(isLegalFLATOffset(ImmField, AddrSpace, Variant)); assert(RemainderOffset + ImmField == COffsetVal); return {ImmField, RemainderOffset}; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -435,7 +435,8 @@ return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset); const SIInstrInfo *TII = ST.getInstrInfo(); - return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS, true); + return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS, + AMDGPU::FlatInstVariant::Scratch); } Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, @@ -518,7 +519,8 @@ assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI)); if (IsFlat) { - assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, true) && + assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, + AMDGPU::FlatInstVariant::Scratch) && "offset should be legal"); FIOp->ChangeToRegister(BaseReg, false); OffsetOp->setImm(NewOffset); @@ -549,7 +551,8 @@ return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset); const SIInstrInfo *TII = ST.getInstrInfo(); - return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, true); + return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, + AMDGPU::FlatInstVariant::Scratch); } const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( @@ -840,9 +843,10 @@ assert((IsFlat || ((Offset % EltSize) == 0)) && "unexpected VGPR spill offset"); - bool IsOffsetLegal = IsFlat - ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, true) - : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset); + bool IsOffsetLegal = + IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, + AMDGPU::FlatInstVariant::Scratch) + : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset); if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) { SOffset = MCRegister(); @@ -1522,7 +1526,7 @@ TII->getNamedOperand(*MI, AMDGPU::OpName::offset); int64_t NewOffset = Offset + OffsetOp->getImm(); if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, - true)) { + AMDGPU::FlatInstVariant::Scratch)) { OffsetOp->setImm(NewOffset); if (FrameReg) return;