Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -5818,55 +5818,6 @@ } return SDValue(); }; - case SIIntrinsic::SI_tbuffer_store: { - - // Extract vindex and voffset from vaddr as appropriate - const ConstantSDNode *OffEn = cast(Op.getOperand(10)); - const ConstantSDNode *IdxEn = cast(Op.getOperand(11)); - SDValue VAddr = Op.getOperand(5); - - SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); - - assert(!(OffEn->isOne() && IdxEn->isOne()) && - "Legacy intrinsic doesn't support both offset and index - use new version"); - - SDValue VIndex = IdxEn->isOne() ? VAddr : Zero; - SDValue VOffset = OffEn->isOne() ? VAddr : Zero; - - // Deal with the vec-3 case - const ConstantSDNode *NumChannels = cast(Op.getOperand(4)); - auto Opcode = NumChannels->getZExtValue() == 3 ? - AMDGPUISD::TBUFFER_STORE_FORMAT_X3 : AMDGPUISD::TBUFFER_STORE_FORMAT; - - unsigned Dfmt = cast(Op.getOperand(8))->getZExtValue(); - unsigned Nfmt = cast(Op.getOperand(9))->getZExtValue(); - unsigned Glc = cast(Op.getOperand(12))->getZExtValue(); - unsigned Slc = cast(Op.getOperand(13))->getZExtValue(); - SDValue Ops[] = { - Chain, - Op.getOperand(3), // vdata - Op.getOperand(2), // rsrc - VIndex, - VOffset, - Op.getOperand(6), // soffset - Op.getOperand(7), // inst_offset - DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format - DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy - DAG.getConstant(IdxEn->isOne(), DL, MVT::i1), // idxen - }; - - assert((cast(Op.getOperand(14)))->getZExtValue() == 0 && - "Value of tfe other than zero is unsupported"); - - EVT VT = Op.getOperand(3).getValueType(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo(), - MachineMemOperand::MOStore, - VT.getStoreSize(), 4); - return DAG.getMemIntrinsicNode(Opcode, DL, - Op->getVTList(), Ops, VT, MMO); - } - case Intrinsic::amdgcn_tbuffer_store: { SDValue VData = Op.getOperand(2); bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16); Index: lib/Target/AMDGPU/SIIntrinsics.td =================================================================== --- lib/Target/AMDGPU/SIIntrinsics.td +++ lib/Target/AMDGPU/SIIntrinsics.td @@ -16,22 +16,4 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; - // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed - def int_SI_tbuffer_store : Intrinsic < - [], - [llvm_anyint_ty, // rsrc(SGPR) - llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32 - llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW - llvm_i32_ty, // vaddr(VGPR) - llvm_i32_ty, // soffset(SGPR) - llvm_i32_ty, // inst_offset(imm) - llvm_i32_ty, // dfmt(imm) - llvm_i32_ty, // nfmt(imm) - llvm_i32_ty, // offen(imm) - llvm_i32_ty, // idxen(imm) - llvm_i32_ty, // glc(imm) - llvm_i32_ty, // slc(imm) - llvm_i32_ty], // tfe(imm) - []>; - } // End TargetPrefix = "SI", isTarget = 1 Index: test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.SI.tbuffer.store.ll +++ /dev/null @@ -1,75 +0,0 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s - -;CHECK-LABEL: {{^}}test1: -;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32 glc slc -define amdgpu_vs void @test1(i32 %a1, i32 %vaddr) { - %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 - call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata, - i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1, - i32 1, i32 0) - ret void -} - -;CHECK-LABEL: {{^}}test1_idx: -;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 idxen offset:32 glc slc -define amdgpu_vs void @test1_idx(i32 %a1, i32 %vaddr) { - %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 - call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata, - i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1, - i32 1, i32 0) - ret void -} - -;CHECK-LABEL: {{^}}test1_scalar_offset: -;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, {{s[0-9]+}} idxen offset:32 glc slc -define amdgpu_vs void @test1_scalar_offset(i32 %a1, i32 %vaddr, i32 inreg %soffset) { - %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 - call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata, - i32 4, i32 %vaddr, i32 %soffset, i32 32, i32 14, i32 4, i32 0, i32 1, i32 1, - i32 1, i32 0) - ret void -} - -;CHECK-LABEL: {{^}}test1_no_glc_slc: -;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:14, nfmt:4, 0 offen offset:32 -define amdgpu_vs void @test1_no_glc_slc(i32 %a1, i32 %vaddr) { - %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 - call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata, - i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 0, - i32 0, i32 0) - ret void -} - -;CHECK-LABEL: {{^}}test2: -;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:13, nfmt:4, 0 offen offset:24 glc slc -define amdgpu_vs void @test2(i32 %a1, i32 %vaddr) { - %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 - call void @llvm.SI.tbuffer.store.v4i32(<4 x i32> undef, <4 x i32> %vdata, - i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1, - i32 1, i32 0) - ret void -} - -;CHECK-LABEL: {{^}}test3: -;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:11, nfmt:4, 0 offen offset:16 glc slc -define amdgpu_vs void @test3(i32 %a1, i32 %vaddr) { - %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0 - call void @llvm.SI.tbuffer.store.v2i32(<4 x i32> undef, <2 x i32> %vdata, - i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1, - i32 1, i32 0) - ret void -} - -;CHECK-LABEL: {{^}}test4: -;CHECK: tbuffer_store_format_x {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, dfmt:4, nfmt:4, 0 offen offset:8 glc slc -define amdgpu_vs void @test4(i32 %vdata, i32 %vaddr) { - call void @llvm.SI.tbuffer.store.i32(<4 x i32> undef, i32 %vdata, - i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1, - i32 1, i32 0) - ret void -} - -declare void @llvm.SI.tbuffer.store.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -declare void @llvm.SI.tbuffer.store.v2i32(<4 x i32>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -declare void @llvm.SI.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)