Skip to content

Commit 35484c9

Browse files
author
Tim Renouf
committedAug 21, 2018
[AMDGPU] New tbuffer intrinsics
Summary: This commit adds new intrinsics llvm.amdgcn.raw.tbuffer.load llvm.amdgcn.struct.tbuffer.load llvm.amdgcn.raw.tbuffer.store llvm.amdgcn.struct.tbuffer.store with the following changes from the llvm.amdgcn.tbuffer.* intrinsics: * there are separate raw and struct versions: raw does not have an index arg and sets idxen=0 in the instruction, and struct always sets idxen=1 in the instruction even if the index is 0, to allow for the fact that gfx9 does bounds checking differently depending on whether idxen is set; * there is a combined format arg (dfmt+nfmt) * there is a combined cachepolicy arg (glc+slc) * there are now only two offset args: one for the offset that is included in bounds checking and swizzling, to be split between the instruction's voffset and immoffset fields, and one for the offset that is excluded from bounds checking and swizzling, to go into the instruction's soffset field. The AMDISD::TBUFFER_* SD nodes always have an index operand, all three offset operands, combined format operand, combined cachepolicy operand, and an extra idxen operand. The tbuffer pseudo- and real instructions now also have a combined format operand. The obsolescent llvm.amdgcn.tbuffer.* and llvm.SI.tbuffer.store intrinsics continue to work. V2: Separate raw and struct intrinsics. V3: Moved extract_glc and extract_slc defs to a more sensible place. V4: Rebased on D49995. V5: Only two separate offset args instead of three. V6: Pseudo- and real instructions have joint format operand. V7: Restored optionality of dfmt and nfmt in assembler. V8: Addressed minor review comments. Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D49026 Change-Id: If22ad77e349fac3a5d2f72dda53c010377d470d4 llvm-svn: 340268
1 parent d378a39 commit 35484c9

18 files changed

+971
-106
lines changed
 

‎llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,6 +815,7 @@ class AMDGPUBufferStore : Intrinsic <
815815
def int_amdgcn_buffer_store_format : AMDGPUBufferStore;
816816
def int_amdgcn_buffer_store : AMDGPUBufferStore;
817817

818+
// Obsolescent tbuffer intrinsics.
818819
def int_amdgcn_tbuffer_load : Intrinsic <
819820
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
820821
[llvm_v4i32_ty, // rsrc(SGPR)
@@ -844,6 +845,54 @@ def int_amdgcn_tbuffer_store : Intrinsic <
844845
[IntrWriteMem], "", [SDNPMemOperand]>,
845846
AMDGPURsrcIntrinsic<1>;
846847

848+
// New tbuffer intrinsics, with:
849+
// - raw and struct variants
850+
// - joint format field
851+
// - joint cachepolicy field
852+
def int_amdgcn_raw_tbuffer_load : Intrinsic <
853+
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
854+
[llvm_v4i32_ty, // rsrc(SGPR)
855+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
856+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
857+
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
858+
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
859+
[IntrReadMem], "", [SDNPMemOperand]>,
860+
AMDGPURsrcIntrinsic<0>;
861+
862+
def int_amdgcn_raw_tbuffer_store : Intrinsic <
863+
[],
864+
[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
865+
llvm_v4i32_ty, // rsrc(SGPR)
866+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
867+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
868+
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
869+
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
870+
[IntrWriteMem], "", [SDNPMemOperand]>,
871+
AMDGPURsrcIntrinsic<1>;
872+
873+
def int_amdgcn_struct_tbuffer_load : Intrinsic <
874+
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
875+
[llvm_v4i32_ty, // rsrc(SGPR)
876+
llvm_i32_ty, // vindex(VGPR)
877+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
878+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
879+
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
880+
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
881+
[IntrReadMem], "", [SDNPMemOperand]>,
882+
AMDGPURsrcIntrinsic<0>;
883+
884+
def int_amdgcn_struct_tbuffer_store : Intrinsic <
885+
[],
886+
[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
887+
llvm_v4i32_ty, // rsrc(SGPR)
888+
llvm_i32_ty, // vindex(VGPR)
889+
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
890+
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
891+
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
892+
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc)
893+
[IntrWriteMem], "", [SDNPMemOperand]>,
894+
AMDGPURsrcIntrinsic<1>;
895+
847896
class AMDGPUBufferAtomic : Intrinsic <
848897
[llvm_i32_ty],
849898
[llvm_i32_ty, // vdata(VGPR)

‎llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
161161
ImmTyExpTgt,
162162
ImmTyExpCompr,
163163
ImmTyExpVM,
164-
ImmTyDFMT,
165-
ImmTyNFMT,
164+
ImmTyFORMAT,
166165
ImmTyHwreg,
167166
ImmTyOff,
168167
ImmTySendMsg,
@@ -312,8 +311,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
312311
bool isSLC() const { return isImmTy(ImmTySLC); }
313312
bool isTFE() const { return isImmTy(ImmTyTFE); }
314313
bool isD16() const { return isImmTy(ImmTyD16); }
315-
bool isDFMT() const { return isImmTy(ImmTyDFMT) && isUInt<8>(getImm()); }
316-
bool isNFMT() const { return isImmTy(ImmTyNFMT) && isUInt<8>(getImm()); }
314+
bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
317315
bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
318316
bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
319317
bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
@@ -666,8 +664,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
666664
case ImmTySLC: OS << "SLC"; break;
667665
case ImmTyTFE: OS << "TFE"; break;
668666
case ImmTyD16: OS << "D16"; break;
669-
case ImmTyDFMT: OS << "DFMT"; break;
670-
case ImmTyNFMT: OS << "NFMT"; break;
667+
case ImmTyFORMAT: OS << "FORMAT"; break;
671668
case ImmTyClampSI: OS << "ClampSI"; break;
672669
case ImmTyOModSI: OS << "OModSI"; break;
673670
case ImmTyDppCtrl: OS << "DppCtrl"; break;
@@ -1061,6 +1058,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
10611058
OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
10621059
OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
10631060
OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1061+
OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
10641062

10651063
void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
10661064
void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
@@ -3522,6 +3520,53 @@ AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
35223520
return MatchOperand_Success;
35233521
}
35243522

3523+
// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
3524+
// values to live in a joint format operand in the MCInst encoding.
3525+
OperandMatchResultTy
3526+
AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
3527+
SMLoc S = Parser.getTok().getLoc();
3528+
int64_t Dfmt = 0, Nfmt = 0;
3529+
// dfmt and nfmt can appear in either order, and each is optional.
3530+
bool GotDfmt = false, GotNfmt = false;
3531+
while (!GotDfmt || !GotNfmt) {
3532+
if (!GotDfmt) {
3533+
auto Res = parseIntWithPrefix("dfmt", Dfmt);
3534+
if (Res != MatchOperand_NoMatch) {
3535+
if (Res != MatchOperand_Success)
3536+
return Res;
3537+
if (Dfmt >= 16) {
3538+
Error(Parser.getTok().getLoc(), "out of range dfmt");
3539+
return MatchOperand_ParseFail;
3540+
}
3541+
GotDfmt = true;
3542+
Parser.Lex();
3543+
continue;
3544+
}
3545+
}
3546+
if (!GotNfmt) {
3547+
auto Res = parseIntWithPrefix("nfmt", Nfmt);
3548+
if (Res != MatchOperand_NoMatch) {
3549+
if (Res != MatchOperand_Success)
3550+
return Res;
3551+
if (Nfmt >= 8) {
3552+
Error(Parser.getTok().getLoc(), "out of range nfmt");
3553+
return MatchOperand_ParseFail;
3554+
}
3555+
GotNfmt = true;
3556+
Parser.Lex();
3557+
continue;
3558+
}
3559+
}
3560+
break;
3561+
}
3562+
if (!GotDfmt && !GotNfmt)
3563+
return MatchOperand_NoMatch;
3564+
auto Format = Dfmt | Nfmt << 4;
3565+
Operands.push_back(
3566+
AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
3567+
return MatchOperand_Success;
3568+
}
3569+
35253570
//===----------------------------------------------------------------------===//
35263571
// ds
35273572
//===----------------------------------------------------------------------===//
@@ -4617,8 +4662,7 @@ void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
46174662

46184663
addOptionalImmOperand(Inst, Operands, OptionalIdx,
46194664
AMDGPUOperand::ImmTyOffset);
4620-
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDFMT);
4621-
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyNFMT);
4665+
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
46224666
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
46234667
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
46244668
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
@@ -4761,8 +4805,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
47614805
{"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
47624806
{"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
47634807
{"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
4764-
{"dfmt", AMDGPUOperand::ImmTyDFMT, false, nullptr},
4765-
{"nfmt", AMDGPUOperand::ImmTyNFMT, false, nullptr},
4808+
{"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
47664809
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
47674810
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
47684811
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
@@ -4844,6 +4887,8 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
48444887
Op.Type == AMDGPUOperand::ImmTyNegHi) {
48454888
res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
48464889
Op.ConvertResult);
4890+
} else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) {
4891+
res = parseDfmtNfmt(Operands);
48474892
} else {
48484893
res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
48494894
}

0 commit comments

Comments
 (0)
Please sign in to comment.