diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -71,9 +71,6 @@ def gi_flat_offset : GIComplexOperandMatcher, GIComplexPatternEquiv; -def gi_flat_offset_signed : - GIComplexOperandMatcher, - GIComplexPatternEquiv; def gi_global_saddr : GIComplexOperandMatcher, GIComplexPatternEquiv; @@ -86,7 +83,7 @@ GIComplexPatternEquiv; def gi_flat_scratch_offset : - GIComplexOperandMatcher, + GIComplexOperandMatcher, GIComplexPatternEquiv; def gi_flat_scratch_saddr : diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -201,7 +201,6 @@ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; - template bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, SDValue &Offset) const; bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, @@ -1649,7 +1648,6 @@ llvm_unreachable("cannot find MemSDNode in the pattern!"); } -template bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, @@ -1666,7 +1664,7 @@ uint64_t COffsetVal = cast(N1)->getSExtValue(); const SIInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) { + if (TII->isLegalFLATOffset(COffsetVal, AS)) { Addr = N0; OffsetVal = COffsetVal; } else { @@ -1683,8 +1681,8 @@ SDLoc DL(N); uint64_t RemainderOffset; - std::tie(OffsetVal, RemainderOffset) - = TII->splitFlatOffset(COffsetVal, AS, IsSigned); + std::tie(OffsetVal, RemainderOffset) = + TII->splitFlatOffset(COffsetVal, AS); SDValue AddOffsetLo = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL); @@ -1766,7 +1764,7 @@ int64_t COffsetVal = cast(RHS)->getSExtValue(); const SIInstrInfo *TII = Subtarget->getInstrInfo(); - if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true)) { + if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS)) { Addr = LHS; ImmOffset = COffsetVal; } else if (!LHS->isDivergent() && COffsetVal > 0) { @@ -1774,8 +1772,8 @@ // saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) + // (large_offset & MaxOffset); int64_t SplitImmOffset, RemainderOffset; - std::tie(SplitImmOffset, RemainderOffset) - = TII->splitFlatOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, true); + std::tie(SplitImmOffset, RemainderOffset) = + TII->splitFlatOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS); if (isUInt<32>(RemainderOffset)) { SDNode *VMov = CurDAG->getMachineNode( @@ -1864,14 +1862,15 @@ const SIInstrInfo *TII = Subtarget->getInstrInfo(); - if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, true)) { + if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS)) { + // TODO Common up const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(*Subtarget, true); // Use signed division by a power of two to truncate towards 0. int64_t D = 1LL << (NumBits - 1); int64_t RemainderOffset = (COffsetVal / D) * D; int64_t ImmField = COffsetVal - RemainderOffset; - assert(TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS, true)); + assert(TII->isLegalFLATOffset(ImmField, AMDGPUAS::PRIVATE_ADDRESS)); assert(RemainderOffset + ImmField == COffsetVal); COffsetVal = ImmField; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -191,14 +191,11 @@ InstructionSelector::ComplexRendererFns selectSmrdSgpr(MachineOperand &Root) const; - template std::pair selectFlatOffsetImpl(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectFlatOffset(MachineOperand &Root) const; - InstructionSelector::ComplexRendererFns - selectFlatOffsetSigned(MachineOperand &Root) const; InstructionSelector::ComplexRendererFns selectGlobalSAddr(MachineOperand &Root) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2981,7 +2981,7 @@ // FIXME: This is only needed because tablegen requires number of dst operands // in match and replace pattern to be the same. Otherwise patterns can be // exported from SDag path. - auto Addr = selectFlatOffsetImpl(MI.getOperand(2)); + auto Addr = selectFlatOffsetImpl(MI.getOperand(2)); Register Data = MI.getOperand(3).getReg(); const unsigned Opc = MRI->getType(Data).isVector() ? @@ -3395,7 +3395,6 @@ }}; } -template std::pair AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root) const { MachineInstr *MI = Root.getParent(); @@ -3413,7 +3412,7 @@ return Default; unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace(); - if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, Signed)) + if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace)) return Default; return std::make_pair(PtrBase, ConstOffset); @@ -3421,17 +3420,7 @@ InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectFlatOffset(MachineOperand &Root) const { - auto PtrWithOffset = selectFlatOffsetImpl(Root); - - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); }, - [=](MachineInstrBuilder &MIB) { MIB.addImm(PtrWithOffset.second); }, - }}; -} - -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectFlatOffsetSigned(MachineOperand &Root) const { - auto PtrWithOffset = selectFlatOffsetImpl(Root); + auto PtrWithOffset = selectFlatOffsetImpl(Root); return {{ [=](MachineInstrBuilder &MIB) { MIB.addReg(PtrWithOffset.first); }, @@ -3470,7 +3459,7 @@ std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI); if (ConstOffset != 0) { - if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, true)) { + if (TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS)) { Addr = PtrBase; ImmOffset = ConstOffset; } else if (ConstOffset > 0) { @@ -3484,8 +3473,8 @@ // saddr + large_offset -> saddr + (voffset = large_offset & ~MaxOffset) // + (large_offset & MaxOffset); int64_t SplitImmOffset, RemainderOffset; - std::tie(SplitImmOffset, RemainderOffset) - = TII.splitFlatOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS, true); + std::tie(SplitImmOffset, RemainderOffset) = + TII.splitFlatOffset(ConstOffset, AMDGPUAS::GLOBAL_ADDRESS); if (isUInt<32>(RemainderOffset)) { MachineInstr *MI = Root.getParent(); @@ -3577,7 +3566,7 @@ std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI); if (ConstOffset != 0 && - TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) { + TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS)) { Addr = PtrBase; ImmOffset = ConstOffset; } diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -6,9 +6,8 @@ // //===----------------------------------------------------------------------===// -def FLATOffset : ComplexPattern", [], [SDNPWantRoot], -10>; -def FLATOffsetSigned : ComplexPattern", [], [SDNPWantRoot], -10>; -def ScratchOffset : ComplexPattern", [], [SDNPWantRoot], -10>; +def FLATOffset : ComplexPattern; +def ScratchOffset : ComplexPattern; def GlobalSAddr : ComplexPattern; def ScratchSAddr : ComplexPattern; @@ -450,7 +449,7 @@ (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), " $vdst, $vaddr, $vdata, off$offset$cpol", [(set vt:$vdst, - (atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>, + (atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, GlobalSaddrTable<0, opName#"_rtn">, AtomicNoRet { let has_saddr = 1; @@ -827,7 +826,7 @@ >; class FlatSignedLoadPat_D16 : GCNPat < - (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in), + (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), (inst $vaddr, $offset, 0, $in) >; @@ -837,7 +836,7 @@ >; class FlatLoadSignedPat : GCNPat < - (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset))), + (vt (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset))), (inst $vaddr, $offset) >; @@ -876,7 +875,7 @@ >; class FlatStoreSignedPat : GCNPat < - (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset)), + (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset)), (inst $vaddr, getVregSrcForVT.ret:$data, $offset) >; @@ -907,13 +906,13 @@ >; class FlatSignedAtomicPatNoRtn : GCNPat < - (node (FLATOffsetSigned i64:$vaddr, i16:$offset), vt:$data), + (node (FLATOffset i64:$vaddr, i16:$offset), vt:$data), (inst VReg_64:$vaddr, getVregSrcForVT.ret:$data, $offset) >; class FlatSignedAtomicPat : GCNPat < - (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)), + (vt (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data)), (inst $vaddr, $data, $offset) >; diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -130,7 +130,7 @@ MFI.getObjectAlign(FI)); if (ST.enableFlatScratch()) { - if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { + if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS)) { BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR)) .addReg(SpillReg, RegState::Kill) .addReg(SPReg) @@ -239,7 +239,7 @@ MFI.getObjectAlign(FI)); if (ST.enableFlatScratch()) { - if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS, true)) { + if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS)) { BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg) .addReg(SPReg) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1238,16 +1238,14 @@ return AM.Scale == 0 && (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset( - AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS, - /*Signed=*/false)); + AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS)); } bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const { if (Subtarget->hasFlatGlobalInsts()) return AM.Scale == 0 && (AM.BaseOffs == 0 || Subtarget->getInstrInfo()->isLegalFLATOffset( - AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS, - /*Signed=*/true)); + AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS)); if (!Subtarget->hasAddr64() || Subtarget->useFlatForGlobal()) { // Assume the we will use FLAT for all global memory accesses diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1067,16 +1067,13 @@ } /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT - /// encoded instruction. If \p Signed, this is for an instruction that - /// interprets the offset as signed. - bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, - bool Signed) const; + /// encoded instruction. + bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace) const; /// Split \p COffsetVal into {immediate offset field, remainder offset} /// values. std::pair splitFlatOffset(int64_t COffsetVal, - unsigned AddrSpace, - bool IsSigned) const; + unsigned AddrSpace) const; /// \brief Return a target-specific opcode if Opcode is a pseudo instruction. /// Return -1 if the target-specific opcode for the pseudo instruction does diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7299,8 +7299,7 @@ return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass); } -bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace, - bool Signed) const { +bool SIInstrInfo::isLegalFLATOffset(int64_t Offset, unsigned AddrSpace) const { // TODO: Should 0 be special cased? if (!ST.hasFlatInstOffsets()) return false; @@ -7308,17 +7307,19 @@ if (ST.hasFlatSegmentOffsetBug() && AddrSpace == AMDGPUAS::FLAT_ADDRESS) return false; + bool Signed = AddrSpace != AMDGPUAS::FLAT_ADDRESS; + unsigned N = AMDGPU::getNumFlatOffsetBits(ST, Signed); return Signed ? isIntN(N, Offset) : isUIntN(N, Offset); } -std::pair SIInstrInfo::splitFlatOffset(int64_t COffsetVal, - unsigned AddrSpace, - bool IsSigned) const { +std::pair +SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace) const { int64_t RemainderOffset = COffsetVal; int64_t ImmField = 0; - const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST, IsSigned); - if (IsSigned) { + bool Signed = AddrSpace != AMDGPUAS::FLAT_ADDRESS; + const unsigned NumBits = AMDGPU::getNumFlatOffsetBits(ST, Signed); + if (Signed) { // Use signed division by a power of two to truncate towards 0. int64_t D = 1LL << (NumBits - 1); RemainderOffset = (COffsetVal / D) * D; @@ -7328,7 +7329,7 @@ RemainderOffset = COffsetVal - ImmField; } - assert(isLegalFLATOffset(ImmField, AddrSpace, IsSigned)); + assert(isLegalFLATOffset(ImmField, AddrSpace)); assert(RemainderOffset + ImmField == COffsetVal); return {ImmField, RemainderOffset}; } diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -435,7 +435,7 @@ return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset); const SIInstrInfo *TII = ST.getInstrInfo(); - return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS, true); + return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS); } Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB, @@ -518,7 +518,7 @@ assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI)); if (IsFlat) { - assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, true) && + assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS) && "offset should be legal"); FIOp->ChangeToRegister(BaseReg, false); OffsetOp->setImm(NewOffset); @@ -549,7 +549,7 @@ return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset); const SIInstrInfo *TII = ST.getInstrInfo(); - return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, true); + return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS); } const TargetRegisterClass *SIRegisterInfo::getPointerRegClass( @@ -840,9 +840,9 @@ assert((IsFlat || ((Offset % EltSize) == 0)) && "unexpected VGPR spill offset"); - bool IsOffsetLegal = IsFlat - ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, true) - : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset); + bool IsOffsetLegal = + IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS) + : SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset); if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) { SOffset = MCRegister(); @@ -1521,8 +1521,7 @@ MachineOperand *OffsetOp = TII->getNamedOperand(*MI, AMDGPU::OpName::offset); int64_t NewOffset = Offset + OffsetOp->getImm(); - if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS, - true)) { + if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS)) { OffsetOp->setImm(NewOffset); if (FrameReg) return;