Index: lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -82,7 +82,7 @@ void PostprocessISelDAG() override; private: - SDValue foldFrameIndex(SDValue N) const; + std::pair foldFrameIndex(SDValue N) const; bool isNoNanSrc(SDValue N) const; bool isInlineImmediate(const SDNode *N) const; bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs, @@ -116,9 +116,11 @@ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &SLC) const; - bool SelectMUBUFScratchOffen(SDValue Addr, SDValue &RSrc, SDValue &VAddr, + bool SelectMUBUFScratchOffen(SDNode *Root, + SDValue Addr, SDValue &RSrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const; - bool SelectMUBUFScratchOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, + bool SelectMUBUFScratchOffset(SDNode *Root, + SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, @@ -1077,13 +1079,33 @@ return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE); } -SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { - if (auto FI = dyn_cast(N)) - return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); - return N; +static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { + auto PSV = PtrInfo.V.dyn_cast(); + return PSV && PSV->isStack(); } -bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDValue Addr, SDValue &Rsrc, +std::pair AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { + const MachineFunction &MF = CurDAG->getMachineFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo(); + + if (auto FI = dyn_cast(N)) { + SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), + FI->getValueType(0)); + + // If we can resolve this to a frame index access, this is relative to the + // frame pointer SGPR. + return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(), + MVT::i32)); + } + + // If we don't know this private access is a local stack object, it needs to + // be relative to the entry point's scratch wave offset register. + return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(), + MVT::i32)); +} + +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Root, + SDValue Addr, SDValue &Rsrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const { @@ -1092,7 +1114,6 @@ const SIMachineFunctionInfo *Info = MF.getInfo(); Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); - SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); if (ConstantSDNode *CAddr = dyn_cast(Addr)) { unsigned Imm = CAddr->getZExtValue(); @@ -1103,6 +1124,14 @@ MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits); VAddr = SDValue(MovHighBits, 0); + + // In a call sequence, stores to the argument stack area are relative to the + // stack pointer. + const MachinePointerInfo &PtrInfo = cast(Root)->getPointerInfo(); + unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? + Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); + + SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); return true; } @@ -1116,19 +1145,20 @@ // Offsets in vaddr must be positive. ConstantSDNode *C1 = cast(N1); if (isLegalMUBUFImmOffset(C1)) { - VAddr = foldFrameIndex(N0); + std::tie(VAddr, SOffset) = foldFrameIndex(N0); ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; } } // (node) - VAddr = foldFrameIndex(Addr); + std::tie(VAddr, SOffset) = foldFrameIndex(Addr); ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); return true; } -bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDValue Addr, +bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Root, + SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset) const { @@ -1141,7 +1171,15 @@ const SIMachineFunctionInfo *Info = MF.getInfo(); SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); - SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32); + + const MachinePointerInfo &PtrInfo = cast(Root)->getPointerInfo(); + unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ? + Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg(); + + // FIXME: Get from MachinePointerInfo? We should only be using the frame + // offset if we know this is in a call sequence. + SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32); + Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); return true; } Index: lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- lib/Target/AMDGPU/BUFInstructions.td +++ lib/Target/AMDGPU/BUFInstructions.td @@ -11,8 +11,8 @@ def MUBUFAddr64 : ComplexPattern; def MUBUFAddr64Atomic : ComplexPattern; -def MUBUFScratchOffen : ComplexPattern; -def MUBUFScratchOffset : ComplexPattern; +def MUBUFScratchOffen : ComplexPattern; +def MUBUFScratchOffset : ComplexPattern; def MUBUFOffset : ComplexPattern; def MUBUFOffsetNoGLC : ComplexPattern;