diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -113,14 +113,6 @@ GIComplexOperandMatcher, GIComplexPatternEquiv; -def gi_mubuf_addr64_atomic : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -def gi_mubuf_offset_atomic : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - def gi_smrd_buffer_imm : GIComplexOperandMatcher, GIComplexPatternEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -188,11 +188,7 @@ SDValue &Offset1, unsigned Size) const; bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, - SDValue &Idxen, SDValue &Addr64, SDValue &CPol, SDValue &TFE, - SDValue &SWZ) const; - bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, - SDValue &SOffset, SDValue &Offset, SDValue &CPol, - SDValue &TFE, SDValue &SWZ) const; + SDValue &Idxen, SDValue &Addr64) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, SDValue &Offset) const; bool SelectMUBUFScratchOffen(SDNode *Parent, @@ -202,9 +198,6 @@ SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; - bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, - SDValue &Offset, SDValue &CPol, SDValue &TFE, - SDValue &SWZ) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, SDValue &Offset) const; @@ -1390,8 +1383,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, SDValue &SOffset, SDValue &Offset, SDValue &Offen, SDValue &Idxen, - SDValue &Addr64, SDValue &CPol, - SDValue &TFE, SDValue &SWZ) const { + SDValue &Addr64) const { // Subtarget prefers to use flat instruction // FIXME: This should be a pattern predicate and not reach here if (Subtarget->useFlatForGlobal()) @@ -1399,11 +1391,6 @@ SDLoc DL(Addr); - if (!CPol) - CPol = CurDAG->getTargetConstant(0, DL, MVT::i32); - TFE = CurDAG->getTargetConstant(0, DL, MVT::i1); - SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1); - Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); @@ -1480,8 +1467,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &SOffset, - SDValue &Offset, SDValue &CPol, - SDValue &TFE, SDValue &SWZ) const { + SDValue &Offset) const { SDValue Ptr, Offen, Idxen, Addr64; // addr64 bit was removed for volcanic islands. @@ -1489,8 +1475,7 @@ if (!Subtarget->hasAddr64()) return false; - if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - CPol, TFE, SWZ)) + if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64)) return false; ConstantSDNode *C = cast(Addr64); @@ -1507,14 +1492,6 @@ return false; } -bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, - SDValue &VAddr, SDValue &SOffset, - SDValue &Offset) const { - SDValue CPol, TFE, SWZ; - - return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, CPol, TFE, SWZ); -} - static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) { auto PSV = PtrInfo.V.dyn_cast(); return PSV && PSV->isStack(); @@ -1633,15 +1610,13 @@ } bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, - SDValue &SOffset, SDValue &Offset, - SDValue &CPol, SDValue &TFE, - SDValue &SWZ) const { + SDValue &SOffset, SDValue &Offset + ) const { SDValue Ptr, VAddr, Offen, Idxen, Addr64; const SIInstrInfo *TII = static_cast(Subtarget->getInstrInfo()); - if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64, - CPol, TFE, SWZ)) + if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64)) return false; if (!cast(Offen)->getSExtValue() && @@ -1660,14 +1635,6 @@ return false; } -bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, - SDValue &Soffset, SDValue &Offset - ) const { - SDValue CPol, TFE, SWZ; - - return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, CPol, TFE, SWZ); -} - // Find a load or store from corresponding pattern root. // Roots may be build_vector, bitconvert or their combinations. static MemSDNode* findMemSDNode(SDNode *N) { diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -6,15 +6,12 @@ // //===----------------------------------------------------------------------===// -def MUBUFAddr64 : ComplexPattern; -def MUBUFAddr64Atomic : ComplexPattern; +def MUBUFAddr64 : ComplexPattern; +def MUBUFOffset : ComplexPattern; def MUBUFScratchOffen : ComplexPattern; def MUBUFScratchOffset : ComplexPattern; -def MUBUFOffset : ComplexPattern; -def MUBUFOffsetAtomic : ComplexPattern; - def BUFAddrKind { int Offset = 0; int OffEn = 1; @@ -402,19 +399,19 @@ RegisterOperand vdata_op = getLdStRegisterOperand.ret; dag InsNoData = !if(!empty(vaddrList), (ins SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, CPol:$cpol), + offset:$offset, CPol_0:$cpol), (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, - offset:$offset, CPol:$cpol) + offset:$offset, CPol_0:$cpol) ); dag InsData = !if(!empty(vaddrList), (ins vdata_op:$vdata, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, CPol:$cpol), + SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol), (ins vdata_op:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc, - SCSrc_b32:$soffset, offset:$offset, CPol:$cpol) + SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol) ); dag ret = !con( !if(!empty(vdataList), InsNoData, InsData), - !if(isLds, (ins SWZ:$swz), (ins TFE:$tfe, SWZ:$swz)) + !if(isLds, (ins SWZ_0:$swz), (ins TFE_0:$tfe, SWZ_0:$swz)) ); } @@ -506,15 +503,15 @@ } class MUBUF_Offset_Load_Pat : Pat < - (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), - (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)) + (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))), + (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset)) >; class MUBUF_Addr64_Load_Pat : Pat < - (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), - (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)) + (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))), + (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset)) >; multiclass MUBUF_Pseudo_Load_Pats { @@ -585,12 +582,12 @@ def _OFFSET : MUBUF_Store_Pseudo , + i16:$offset))]>, MUBUFAddr64Table<0, NAME>; def _ADDR64 : MUBUF_Store_Pseudo , + i16:$offset))]>, MUBUFAddr64Table<1, NAME>; def _OFFEN : MUBUF_Store_Pseudo ; @@ -757,14 +754,14 @@ let FPAtomic = isFP in def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo , MUBUFAddr64Table <0, NAME # "_RTN">; let FPAtomic = isFP in def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo , MUBUFAddr64Table <1, NAME # "_RTN">; @@ -1539,20 +1536,20 @@ class MUBUFLoad_PatternADDR64 : GCNPat < (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, - i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz) + i16:$offset))), + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset) >; multiclass MUBUFLoad_Atomic_Pattern { def : GCNPat < (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))), - (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset) >; def : GCNPat < - (vt (atomic_ld (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset))), - (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) + (vt (atomic_ld (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset))), + (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset)) >; } @@ -1572,9 +1569,8 @@ PatFrag ld> { def : GCNPat < - (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz))), - (Instr_OFFSET $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz) + (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))), + (Instr_OFFSET $srsrc, $soffset, $offset) >; } @@ -1612,12 +1608,12 @@ ValueType vt, PatFrag ld_frag> { def : GCNPat < (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, $in) + (InstrOffen $vaddr, $srsrc, $soffset, $offset, $in) >; def : GCNPat < (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in), - (InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, $in) + (InstrOffset $srsrc, $soffset, $offset, $in) >; } @@ -1663,12 +1659,12 @@ // Store follows atomic op convention so address is first def : GCNPat < (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), vt:$val), - (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0) + (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset) >; def : GCNPat < - (atomic_st (MUBUFOffsetAtomic v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), - (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0) + (atomic_st (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val), + (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset)) >; } let SubtargetPredicate = isGFX6GFX7 in { @@ -1681,9 +1677,8 @@ PatFrag st> { def : GCNPat < - (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, - i16:$offset, CPol:$cpol, i1:$tfe, i1:$swz)), - (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, CPol:$cpol, $tfe, $swz) + (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset)), + (Instr_OFFSET $vdata, $srsrc, $soffset, $offset) >; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1116,7 +1116,9 @@ def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>; def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; +def TFE_0 : NamedOperandBit_0<"TFE", NamedMatchClass<"TFE">>; def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>; +def SWZ_0 : NamedOperandBit_0<"SWZ", NamedMatchClass<"SWZ">>; def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>; def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>; diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll --- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll @@ -1,14 +1,17 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s -; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89,SIVI %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s ; GCN-LABEL: {{^}}extract_vector_elt_v2i16: ; GCN: s_load_dword [[VEC:s[0-9]+]] -; GCN: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16 -; GCN-DAG: v_mov_b32_e32 [[VELT0:v[0-9]+]], [[VEC]] -; GCN-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]] -; GCN-DAG: buffer_store_short [[VELT0]] -; GCN-DAG: buffer_store_short [[VELT1]] +; SIVI: s_lshr_b32 [[ELT1:s[0-9]+]], [[VEC]], 16 +; SIVI-DAG: v_mov_b32_e32 [[VELT0:v[0-9]+]], [[VEC]] +; SIVI-DAG: v_mov_b32_e32 [[VELT1:v[0-9]+]], [[ELT1]] +; SIVI-DAG: buffer_store_short [[VELT0]] +; SIVI-DAG: buffer_store_short [[VELT1]] +; GFX9: v_mov_b32_e32 [[VVEC:v[0-9]+]], [[VEC]] +; GFX9: global_store_short_d16_hi v{{[0-9]+}}, [[VVEC]], +; GFX9: buffer_store_short [[VVEC]], define amdgpu_kernel void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(4)* %vec.ptr) #0 { %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr %p0 = extractelement <2 x i16> %vec, i32 0