Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -983,14 +983,6 @@ return true; } -static bool isLegalMUBUFImmOffset(unsigned Imm) { - return isUInt<12>(Imm); -} - -static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) { - return isLegalMUBUFImmOffset(Imm->getZExtValue()); -} - bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, @@ -1032,7 +1024,7 @@ Ptr = N0; } - if (isLegalMUBUFImmOffset(C1)) { + if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; } @@ -1142,7 +1134,7 @@ if (ConstantSDNode *CAddr = dyn_cast(Addr)) { unsigned Imm = CAddr->getZExtValue(); - assert(!isLegalMUBUFImmOffset(Imm) && + assert(!SIInstrInfo::isLegalMUBUFImmOffset(Imm) && "should have been selected by other pattern"); SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); @@ -1169,7 +1161,7 @@ // Offsets in vaddr must be positive. ConstantSDNode *C1 = cast(N1); - if (isLegalMUBUFImmOffset(C1)) { + if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { std::tie(VAddr, SOffset) = foldFrameIndex(N0); ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); return true; @@ -1188,7 +1180,7 @@ SDValue &SOffset, SDValue &Offset) const { ConstantSDNode *CAddr = dyn_cast(Addr); - if (!CAddr || !isLegalMUBUFImmOffset(CAddr)) + if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) return false; SDLoc DL(Addr); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h @@ -860,6 +860,10 @@ static bool isKillTerminator(unsigned Opcode); const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const; + + static bool isLegalMUBUFImmOffset(unsigned Imm) { + return isUInt<12>(Imm); + } }; namespace AMDGPU { Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3712,13 +3712,43 @@ case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR: { unsigned VDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + const MachineOperand *VAddr = getNamedOperand(Inst, AMDGPU::OpName::soff); + auto Add = MRI.getUniqueVRegDef(VAddr->getReg()); + unsigned Offset = 0; + + // See if we can extract an immediate offset by recognizing one of these: + // V_ADD_I32_e32 dst, imm, src1 + // V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1 + // V_ADD will be removed by "Remove dead machine instructions". + if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) { + const MachineOperand *Src = + getNamedOperand(*Add, AMDGPU::OpName::src0); + + if (Src && Src->isReg()) { + auto Mov = MRI.getUniqueVRegDef(Src->getReg()); + if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32) + Src = &Mov->getOperand(1); + } + + if (Src) { + if (Src->isImm()) + Offset = Src->getImm(); + else if (Src->isCImm()) + Offset = Src->getCImm()->getZExtValue(); + } + + if (Offset && isLegalMUBUFImmOffset(Offset)) + VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1); + else + Offset = 0; + } BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst) - .add(*getNamedOperand(Inst, AMDGPU::OpName::soff)) // vaddr + .add(*VAddr) // vaddr .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc .addImm(0) // soffset - .addImm(0) // offset + .addImm(Offset) // offset .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm()) .addImm(0) // slc .addImm(0) // tfe Index: llvm/trunk/test/CodeGen/AMDGPU/smrd.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/smrd.ll +++ llvm/trunk/test/CodeGen/AMDGPU/smrd.ll @@ -191,6 +191,27 @@ ret float %r } +; GCN-LABEL: {{^}}smrd_vgpr_offset_imm: +; GCN-NEXT: BB# +; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ; +define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 { +main_body: + %off = add i32 %offset, 4095 + %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off) + ret float %r +} + +; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large: +; GCN-NEXT: BB# +; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0 +; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ; +define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 { +main_body: + %off = add i32 %offset, 4096 + %r = call float @llvm.SI.load.const.v4i32(<4 x i32> %desc, i32 %off) + ret float %r +} + declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1