diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -231,10 +231,8 @@ // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the // three offsets (voffset, soffset and instoffset) into the SDValue[3] array // pointed to by Offsets. - /// \returns 0 If there is a non-constant offset or if the offset is 0. - /// Otherwise returns the constant offset. - unsigned setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, - SDValue *Offsets, Align Alignment = Align(4)) const; + void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG, + SDValue *Offsets, Align Alignment = Align(4)) const; // Handle 8 bit and 16 bit buffer loads SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6795,28 +6795,29 @@ } } -// This function computes an appropriate offset to pass to -// MachineMemOperand::setOffset() based on the offset inputs to -// an intrinsic. If any of the offsets are non-contstant or -// if VIndex is non-zero then this function returns 0. Otherwise, -// it returns the sum of VOffset, SOffset, and Offset. -static unsigned getBufferOffsetForMMO(SDValue VOffset, - SDValue SOffset, - SDValue Offset, - SDValue VIndex = SDValue()) { - +/// Update \p MMO based on the offset inputs to an intrinsic. +static void updateBufferMMO(MachineMemOperand *MMO, SDValue VOffset, + SDValue SOffset, SDValue Offset, + SDValue VIndex = SDValue()) { if (!isa(VOffset) || !isa(SOffset) || - !isa(Offset)) - return 0; + !isa(Offset)) { + // The combined offset is not known to be constant, so we cannot represent + // it in the MMO. Give up. + MMO->setValue((Value *)nullptr); + return; + } - if (VIndex) { - if (!isa(VIndex) || !cast(VIndex)->isNullValue()) - return 0; + if (VIndex && (!isa(VIndex) || + !cast(VIndex)->isNullValue())) { + // The strided index component of the address is not known to be zero, so we + // cannot represent it in the MMO. Give up. + MMO->setValue((Value *)nullptr); + return; } - return cast(VOffset)->getSExtValue() + - cast(SOffset)->getSExtValue() + - cast(Offset)->getSExtValue(); + MMO->setOffset(cast(VOffset)->getSExtValue() + + cast(SOffset)->getSExtValue() + + cast(Offset)->getSExtValue()); } SDValue SITargetLowering::lowerRawBufferAtomicIntrin(SDValue Op, @@ -6839,13 +6840,21 @@ }; auto *M = cast(Op); - M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6])); + updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6]); EVT MemVT = VData.getValueType(); return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT, M->getMemOperand()); } +// Return a value to use for the idxen operand by examining the vindex operand. +static unsigned getIdxEn(SDValue VIndex) { + if (auto VIndexC = dyn_cast(VIndex)) + // No need to set idxen if vindex is known to be zero. + return VIndexC->getZExtValue() != 0; + return 1; +} + SDValue SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG, unsigned NewOpcode) const { @@ -6866,8 +6875,7 @@ }; auto *M = cast(Op); - M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6], - Ops[3])); + updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]); EVT MemVT = VData.getValueType(); return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT, @@ -6980,9 +6988,7 @@ case Intrinsic::amdgcn_buffer_load_format: { unsigned Glc = cast(Op.getOperand(5))->getZExtValue(); unsigned Slc = cast(Op.getOperand(6))->getZExtValue(); - unsigned IdxEn = 1; - if (auto Idx = dyn_cast(Op.getOperand(3))) - IdxEn = Idx->getZExtValue() != 0; + unsigned IdxEn = getIdxEn(Op.getOperand(3)); SDValue Ops[] = { Op.getOperand(0), // Chain Op.getOperand(2), // rsrc @@ -6993,11 +6999,7 @@ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - - unsigned Offset = setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); - // We don't know the offset if vindex is non-zero, so clear it. - if (IdxEn) - Offset = 0; + setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]); unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ? AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT; @@ -7005,7 +7007,7 @@ EVT VT = Op.getValueType(); EVT IntVT = VT.changeTypeToInteger(); auto *M = cast(Op); - M->getMemOperand()->setOffset(Offset); + updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5], Ops[2]); EVT LoadVT = Op.getValueType(); if (LoadVT.getScalarType() == MVT::f16) @@ -7037,7 +7039,7 @@ }; auto *M = cast(Op); - M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5])); + updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5]); return lowerIntrinsicLoad(M, IsFormat, DAG, Ops); } case Intrinsic::amdgcn_struct_buffer_load: @@ -7057,8 +7059,7 @@ }; auto *M = cast(Op); - M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[3], Ops[4], Ops[5], - Ops[2])); + updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5], Ops[2]); return lowerIntrinsicLoad(cast(Op), IsFormat, DAG, Ops); } case Intrinsic::amdgcn_tbuffer_load: { @@ -7069,9 +7070,7 @@ unsigned Nfmt = cast(Op.getOperand(8))->getZExtValue(); unsigned Glc = cast(Op.getOperand(9))->getZExtValue(); unsigned Slc = cast(Op.getOperand(10))->getZExtValue(); - unsigned IdxEn = 1; - if (auto Idx = dyn_cast(Op.getOperand(3))) - IdxEn = Idx->getZExtValue() != 0; + unsigned IdxEn = getIdxEn(Op.getOperand(3)); SDValue Ops[] = { Op.getOperand(0), // Chain Op.getOperand(2), // rsrc @@ -7152,9 +7151,7 @@ case Intrinsic::amdgcn_buffer_atomic_xor: case Intrinsic::amdgcn_buffer_atomic_fadd: { unsigned Slc = cast(Op.getOperand(6))->getZExtValue(); - unsigned IdxEn = 1; - if (auto Idx = dyn_cast(Op.getOperand(4))) - IdxEn = Idx->getZExtValue() != 0; + unsigned IdxEn = getIdxEn(Op.getOperand(4)); SDValue Ops[] = { Op.getOperand(0), // Chain Op.getOperand(2), // vdata @@ -7166,14 +7163,12 @@ DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); - // We don't know the offset if vindex is non-zero, so clear it. - if (IdxEn) - Offset = 0; + setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + EVT VT = Op.getValueType(); auto *M = cast(Op); - M->getMemOperand()->setOffset(Offset); + updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]); unsigned Opcode = 0; switch (IntrID) { @@ -7296,9 +7291,7 @@ case Intrinsic::amdgcn_buffer_atomic_cmpswap: { unsigned Slc = cast(Op.getOperand(7))->getZExtValue(); - unsigned IdxEn = 1; - if (auto Idx = dyn_cast(Op.getOperand(5))) - IdxEn = Idx->getZExtValue() != 0; + unsigned IdxEn = getIdxEn(Op.getOperand(5)); SDValue Ops[] = { Op.getOperand(0), // Chain Op.getOperand(2), // src @@ -7311,13 +7304,11 @@ DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - unsigned Offset = setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); - // We don't know the offset if vindex is non-zero, so clear it. - if (IdxEn) - Offset = 0; + setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]); + EVT VT = Op.getValueType(); auto *M = cast(Op); - M->getMemOperand()->setOffset(Offset); + updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7], Ops[4]); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -7338,7 +7329,7 @@ }; EVT VT = Op.getValueType(); auto *M = cast(Op); - M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7])); + updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7]); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -7359,8 +7350,7 @@ }; EVT VT = Op.getValueType(); auto *M = cast(Op); - M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[5], Ops[6], Ops[7], - Ops[4])); + updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7], Ops[4]); return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL, Op->getVTList(), Ops, VT, M->getMemOperand()); @@ -7657,9 +7647,7 @@ unsigned Nfmt = cast(Op.getOperand(9))->getZExtValue(); unsigned Glc = cast(Op.getOperand(10))->getZExtValue(); unsigned Slc = cast(Op.getOperand(11))->getZExtValue(); - unsigned IdxEn = 1; - if (auto Idx = dyn_cast(Op.getOperand(4))) - IdxEn = Idx->getZExtValue() != 0; + unsigned IdxEn = getIdxEn(Op.getOperand(4)); SDValue Ops[] = { Chain, VData, // vdata @@ -7737,9 +7725,7 @@ VData = handleD16VData(VData, DAG); unsigned Glc = cast(Op.getOperand(6))->getZExtValue(); unsigned Slc = cast(Op.getOperand(7))->getZExtValue(); - unsigned IdxEn = 1; - if (auto Idx = dyn_cast(Op.getOperand(4))) - IdxEn = Idx->getZExtValue() != 0; + unsigned IdxEn = getIdxEn(Op.getOperand(4)); SDValue Ops[] = { Chain, VData, @@ -7751,15 +7737,13 @@ DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen }; - unsigned Offset = setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); - // We don't know the offset if vindex is non-zero, so clear it. - if (IdxEn) - Offset = 0; + setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]); + unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ? AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast(Op); - M->getMemOperand()->setOffset(Offset); + updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics EVT VDataType = VData.getValueType().getScalarType(); @@ -7806,7 +7790,7 @@ IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast(Op); - M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6])); + updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6]); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32) @@ -7853,8 +7837,7 @@ AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT; Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc; MemSDNode *M = cast(Op); - M->getMemOperand()->setOffset(getBufferOffsetForMMO(Ops[4], Ops[5], Ops[6], - Ops[3])); + updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]); // Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics EVT VDataType = VData.getValueType().getScalarType(); @@ -7934,9 +7917,9 @@ // Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the // three offsets (voffset, soffset and instoffset) into the SDValue[3] array // pointed to by Offsets. -unsigned SITargetLowering::setBufferOffsets(SDValue CombinedOffset, - SelectionDAG &DAG, SDValue *Offsets, - Align Alignment) const { +void SITargetLowering::setBufferOffsets(SDValue CombinedOffset, + SelectionDAG &DAG, SDValue *Offsets, + Align Alignment) const { SDLoc DL(CombinedOffset); if (auto C = dyn_cast(CombinedOffset)) { uint32_t Imm = C->getZExtValue(); @@ -7946,7 +7929,7 @@ Offsets[0] = DAG.getConstant(0, DL, MVT::i32); Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); - return SOffset + ImmOffset; + return; } } if (DAG.isBaseWithConstantOffset(CombinedOffset)) { @@ -7959,13 +7942,12 @@ Offsets[0] = N0; Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32); Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32); - return 0; + return; } } Offsets[0] = CombinedOffset; Offsets[1] = DAG.getConstant(0, DL, MVT::i32); Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32); - return 0; } // Handle 8 bit and 16 bit buffer loads diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll --- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll @@ -12,29 +12,29 @@ ; GCN: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM killed [[REG_SEQUENCE]], 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg0, addrspace 6) ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 16, align 1, addrspace 4) ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 32, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 48, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 64, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 64, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 80, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY]], %subreg.sub1 ; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF @@ -42,50 +42,50 @@ ; GCN: [[COPY2:%[0-9]+]]:vreg_64 = COPY [[DEF]] ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 ; GCN: [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[DEF1]] ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]].sub0 ; GCN: [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[DEF2]] ; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY6]].sub0 ; GCN: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 96, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[DEF3]] ; GCN: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY8]].sub0 ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec ; GCN: BUFFER_ATOMIC_ADD_F32_OFFSET [[V_MOV_B32_e32_1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 112, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_OFFEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_F32_IDXEN [[V_MOV_B32_e32_1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 112, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 64 ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_1]], 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) ; GCN: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 128 ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 128, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_2]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY10]], 128, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) ; GCN: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 72 ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_3]], 72, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) ; GCN: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 144 ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 144, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_4]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_OFFSET [[S_LOAD_DWORDX4_IMM]], [[COPY11]], 144, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) ; GCN: [[S_MOV_B32_5:%[0-9]+]]:sreg_32 = S_MOV_B32 80 ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_5]], 80, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) ; GCN: [[S_MOV_B32_6:%[0-9]+]]:sreg_32 = S_MOV_B32 160 ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 160, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_OFFSET [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[COPY12]], 160, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 176, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 176, align 1, addrspace 4) @@ -102,12 +102,12 @@ ; GCN: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[DEF6]] ; GCN: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0 ; GCN: [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY19:%[0-9]+]]:vreg_64 = COPY [[DEF7]] ; GCN: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY19]].sub0 ; GCN: [[COPY21:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_OFFSET [[REG_SEQUENCE1]], [[S_LOAD_DWORDX4_IMM]], [[COPY21]], 176, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY22:%[0-9]+]]:vreg_64 = COPY [[DEF8]] ; GCN: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY22]].sub0 ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ @@ -116,18 +116,18 @@ ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_9]], 96, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) ; GCN: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 192 ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 192, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFEN_exact killed [[BUFFER_LOAD_DWORDX4_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_10]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: [[COPY24:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_OFFSET_exact killed [[BUFFER_LOAD_DWORDX4_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY24]], 192, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET1]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) ; GCN: [[S_MOV_B32_11:%[0-9]+]]:sreg_32 = S_MOV_B32 104 ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET2]], [[S_LOAD_DWORDX4_IMM]], killed [[S_MOV_B32_11]], 104, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) ; GCN: [[S_MOV_B32_12:%[0-9]+]]:sreg_32 = S_MOV_B32 208 ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET3]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 208, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFEN1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_12]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: [[COPY25:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed [[BUFFER_LOAD_FORMAT_XYZW_OFFSET4]], [[S_LOAD_DWORDX4_IMM]], [[COPY25]], 208, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY26]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) @@ -138,12 +138,12 @@ ; GCN: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY28]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 224, align 1, addrspace 4) ; GCN: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[COPY]], %subreg.sub1 - ; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_14]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY30:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY29]], [[S_LOAD_DWORDX4_IMM]], [[COPY30]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 224, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY31]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) @@ -153,12 +153,12 @@ ; GCN: [[S_MOV_B32_16:%[0-9]+]]:sreg_32 = S_MOV_B32 240 ; GCN: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY33]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource" + 240, align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_BOTHEN [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_16]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY35:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) - ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY34]], [[S_LOAD_DWORDX4_IMM]], [[COPY35]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 240, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY36]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) @@ -167,12 +167,12 @@ ; GCN: [[S_MOV_B32_17:%[0-9]+]]:sreg_32 = S_MOV_B32 256 ; GCN: [[COPY38:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY38]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource" + 256, align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_BOTHEN [[COPY]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_17]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY39:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY40:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY39]], [[S_LOAD_DWORDX4_IMM]], [[COPY40]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_ADD_IDXEN [[COPY]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 256, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY41:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[DEF9:%[0-9]+]]:vreg_64 = IMPLICIT_DEF @@ -192,21 +192,21 @@ ; GCN: [[COPY48:%[0-9]+]]:vreg_64 = COPY [[DEF11]] ; GCN: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[COPY48]].sub0 ; GCN: [[DEF12:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_BOTHEN [[REG_SEQUENCE1]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_19]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY50:%[0-9]+]]:vreg_64 = COPY [[DEF12]] ; GCN: [[COPY51:%[0-9]+]]:vgpr_32 = COPY [[COPY50]].sub0 ; GCN: [[COPY52:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY53:%[0-9]+]]:sreg_32 = COPY [[COPY]] ; GCN: [[DEF13:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY52]], [[S_LOAD_DWORDX4_IMM]], [[COPY53]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY54:%[0-9]+]]:vreg_64 = COPY [[DEF13]] ; GCN: [[COPY55:%[0-9]+]]:vgpr_32 = COPY [[COPY54]].sub0 ; GCN: [[DEF14:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY56:%[0-9]+]]:vreg_64 = COPY [[DEF14]] ; GCN: [[COPY57:%[0-9]+]]:vgpr_32 = COPY [[COPY56]].sub0 ; GCN: [[DEF15:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32) on custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_ATOMIC_CMPSWAP_IDXEN [[REG_SEQUENCE1]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 272, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 4) ; GCN: [[COPY58:%[0-9]+]]:vreg_64 = COPY [[DEF15]] ; GCN: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[COPY58]].sub0 ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ @@ -217,12 +217,12 @@ ; GCN: [[S_MOV_B32_20:%[0-9]+]]:sreg_32 = S_MOV_B32 288 ; GCN: [[COPY62:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN4]], [[COPY62]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 288, align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_BOTHEN_exact killed [[BUFFER_LOAD_DWORDX4_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_20]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: [[COPY63:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY64:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN5]], [[COPY63]], [[S_LOAD_DWORDX4_IMM]], [[COPY64]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX4_IDXEN_exact killed [[BUFFER_LOAD_DWORDX4_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 288, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: INLINEASM &"", 1 /* sideeffect attdialect */ ; GCN: [[COPY65:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN2]], [[COPY65]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) @@ -232,12 +232,12 @@ ; GCN: [[S_MOV_B32_22:%[0-9]+]]:sreg_32 = S_MOV_B32 304 ; GCN: [[COPY67:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN4]], [[COPY67]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource" + 304, align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_BOTHEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_BOTHEN]], [[REG_SEQUENCE2]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_22]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: [[COPY68:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GCN: [[COPY69:%[0-9]+]]:sreg_32 = COPY [[COPY]] - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) - ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN5]], [[COPY68]], [[S_LOAD_DWORDX4_IMM]], [[COPY69]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN6]], [[V_MOV_B32_e32_]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) + ; GCN: BUFFER_STORE_FORMAT_XYZW_IDXEN_exact killed [[BUFFER_LOAD_FORMAT_XYZW_IDXEN7]], [[COPY]], [[S_LOAD_DWORDX4_IMM]], [[S_MOV_B32_]], 304, 0, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 4) ; GCN: S_ENDPGM 0 bb.0: %tmp0 = load <4 x i32>, <4 x i32> addrspace(6)* %arg0, align 16, !invariant.load !0 diff --git a/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll b/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll --- a/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll @@ -44,10 +44,25 @@ ret void } +; GCN-LABEL: {{^}}test1: +; GCN: buffer_store_dword +; GCN: buffer_load_dword +; GCN: buffer_store_dword +define amdgpu_cs void @test1(<4 x i32> inreg %buf, i32 %off) { +.entry: + call void @llvm.amdgcn.raw.buffer.store.i32(i32 0, <4 x i32> %buf, i32 8, i32 0, i32 0) + %val = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %buf, i32 %off, i32 0, i32 0) + call void @llvm.amdgcn.raw.buffer.store.i32(i32 %val, <4 x i32> %buf, i32 0, i32 0, i32 0) + ret void +} + declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2 declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #3 +declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #2 + +declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #3 + attributes #2 = { nounwind readonly } attributes #3 = { nounwind writeonly } - diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll --- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll +++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll @@ -12,7 +12,7 @@ ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]] ; GCN: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "BufferResource", align 1, addrspace 4) + ; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 1, addrspace 4) ; GCN: [[COPY1:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 @@ -21,7 +21,7 @@ ; GCN: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] ; GCN: [[DEF3:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96) into custom "BufferResource", align 1, addrspace 4) + ; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable store (s96), align 1, addrspace 4) ; GCN: S_ENDPGM 0 main_body: %tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0)