Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -61,15 +61,18 @@ enum ImmTy { ImmTyNone, - ImmTyDSOffset0, - ImmTyDSOffset1, ImmTyGDS, + ImmTyOffen, + ImmTyIdxen, + ImmTyAddr64, ImmTyOffset, + ImmTyOffset0, + ImmTyOffset1, ImmTyGLC, ImmTySLC, ImmTyTFE, - ImmTyClamp, - ImmTyOMod, + ImmTyClampSI, + ImmTyOModSI, ImmTyDppCtrl, ImmTyDppRowMask, ImmTyDppBankMask, @@ -149,13 +152,6 @@ } } - bool defaultTokenHasSuffix() const { - StringRef Token(Tok.Data, Tok.Length); - - return Token.endswith("_e32") || Token.endswith("_e64") || - Token.endswith("_dpp"); - } - bool isToken() const override { return Kind == Token; } @@ -178,16 +174,6 @@ F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0); } - bool isDSOffset0() const { - assert(isImm()); - return Imm.Type == ImmTyDSOffset0; - } - - bool isDSOffset1() const { - assert(isImm()); - return Imm.Type == ImmTyDSOffset1; - } - int64_t getImm() const { return Imm.Val; } @@ -213,12 +199,12 @@ return isImm() && Imm.Type == ImmT; } - bool isClamp() const { - return isImmTy(ImmTyClamp); + bool isClampSI() const { + return isImmTy(ImmTyClampSI); } - bool isOMod() const { - return isImmTy(ImmTyOMod); + bool isOModSI() const { + return isImmTy(ImmTyOModSI); } bool isImmModifier() const { @@ -235,9 +221,15 @@ bool isLWE() const { return isImmTy(ImmTyLWE); } bool isMod() const { - return isClamp() || isOMod(); + return isClampSI() || isOModSI(); } + bool isOffen() const { return isImmTy(ImmTyOffen); } + bool isIdxen() const { return isImmTy(ImmTyIdxen); } + bool isAddr64() const { return isImmTy(ImmTyAddr64); } + bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } + bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); } + bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } bool isGDS() const { return isImmTy(ImmTyGDS); } bool isGLC() const { return isImmTy(ImmTyGLC); } bool isSLC() const { return isImmTy(ImmTySLC); } @@ -347,16 +339,47 @@ return EndLoc; } + void printImmTy(raw_ostream& OS, ImmTy Type) const { + switch (Type) { + case ImmTyNone: OS << "None"; break; + case ImmTyGDS: OS << "GDS"; break; + case ImmTyOffen: OS << "Offen"; break; + case ImmTyIdxen: OS << "Idxen"; break; + case ImmTyAddr64: OS << "Addr64"; break; + case ImmTyOffset: OS << "Offset"; break; + case ImmTyOffset0: OS << "Offset0"; break; + case ImmTyOffset1: OS << "Offset1"; break; + case ImmTyGLC: OS << "GLC"; break; + case ImmTySLC: OS << "SLC"; break; + case ImmTyTFE: OS << "TFE"; break; + case ImmTyClampSI: OS << "ClampSI"; break; + case ImmTyOModSI: OS << "OModSI"; break; + case ImmTyDppCtrl: OS << "DppCtrl"; break; + case ImmTyDppRowMask: OS << "DppRowMask"; break; + case ImmTyDppBankMask: OS << "DppBankMask"; break; + case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; + case ImmTySdwaSel: OS << "SdwaSel"; break; + case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; + case ImmTyDMask: OS << "DMask"; break; + case ImmTyUNorm: OS << "UNorm"; break; + case ImmTyDA: OS << "DA"; break; + case ImmTyR128: OS << "R128"; break; + case ImmTyLWE: OS << "LWE"; break; + case ImmTyHwreg: OS << "Hwreg"; break; + } + } + void print(raw_ostream &OS) const override { switch (Kind) { case Register: OS << "'; break; case Immediate: - if (Imm.Type != AMDGPUOperand::ImmTyNone) - OS << getImm(); - else - OS << '<' << getImm() << " mods: " << Imm.Modifiers << '>'; + OS << '<' << getImm(); + if (getImmTy() != ImmTyNone) { + OS << " type: "; printImmTy(OS, getImmTy()); + } + OS << " mods: " << Imm.Modifiers << '>'; break; case Token: OS << '\'' << getToken() << '\''; @@ -414,8 +437,6 @@ return Op; } - bool isDSOffset() const; - bool isDSOffset01() const; bool isSWaitCnt() const; bool isHwreg() const; bool isMubufOffset() const; @@ -521,43 +542,55 @@ SMLoc NameLoc, OperandVector &Operands) override; OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int, - int64_t Default = 0); + int64_t Default = 0, bool AddDefault = false); OperandMatchResultTy parseIntWithPrefix(const char *Prefix, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy = - AMDGPUOperand::ImmTyNone); + enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, + int64_t Default = 0, bool AddDefault = false, + bool (*ConvertResult)(int64_t&) = 0); OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands, enum AMDGPUOperand::ImmTy ImmTy = - AMDGPUOperand::ImmTyNone); + AMDGPUOperand::ImmTyNone, + bool AddDefault = false); OperandMatchResultTy parseOptionalOps( const ArrayRef &OptionalOps, OperandVector &Operands); OperandMatchResultTy parseStringWithPrefix(const char *Prefix, StringRef &Value); + OperandMatchResultTy parseOptionalOperand(OperandVector &Operands, const OptionalOperand& Op, bool AddDefault); + OperandMatchResultTy parseAMDGPUOperand(OperandVector &Operands, StringRef Name); void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); void cvtDS(MCInst &Inst, const OperandVector &Operands); - OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands); - OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands); - OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands); bool parseCnt(int64_t &IntVal); OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); - bool parseHwreg(int64_t &HwRegCode, int64_t &Offset, int64_t &Width); - OperandMatchResultTy parseHwregOp(OperandVector &Operands); + bool parseHwregOperand(int64_t &HwRegCode, int64_t &Offset, int64_t &Width); + OperandMatchResultTy parseHwreg(OperandVector &Operands); OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); - OperandMatchResultTy parseFlatOptionalOps(OperandVector &Operands); - OperandMatchResultTy parseFlatAtomicOptionalOps(OperandVector &Operands); void cvtFlat(MCInst &Inst, const OperandVector &Operands); void cvtFlatAtomic(MCInst &Inst, const OperandVector &Operands); void cvtMubuf(MCInst &Inst, const OperandVector &Operands); - OperandMatchResultTy parseOffset(OperandVector &Operands); - OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands); - OperandMatchResultTy parseGLC(OperandVector &Operands); - OperandMatchResultTy parseSLC(OperandVector &Operands); - OperandMatchResultTy parseTFE(OperandVector &Operands); + OperandMatchResultTy parseOModSI(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "omod"); } + OperandMatchResultTy parseClampSI(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "clamp"); } + OperandMatchResultTy parseSMRDOffset(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "smrd_offset"); } + OperandMatchResultTy parseSMRDLiteralOffset(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "smrd_literal_offset"); } + OperandMatchResultTy parseDPPCtrl(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "dpp_ctrl"); } + OperandMatchResultTy parseRowMask(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "row_mask"); } + OperandMatchResultTy parseBankMask(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "bank_mask"); } + OperandMatchResultTy parseBoundCtrl(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "bound_ctrl"); } + OperandMatchResultTy parseOffen(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "offen"); } + OperandMatchResultTy parseIdxen(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "idxen"); } + OperandMatchResultTy parseAddr64(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "addr64"); } + OperandMatchResultTy parseOffset(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "offset"); } + OperandMatchResultTy parseOffset0(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "offset0"); } + OperandMatchResultTy parseOffset1(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "offset1"); } + OperandMatchResultTy parseGLC(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "glc"); } + OperandMatchResultTy parseSLC(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "slc"); } + OperandMatchResultTy parseTFE(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "tfe"); } + OperandMatchResultTy parseGDS(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "gds"); } OperandMatchResultTy parseDMask(OperandVector &Operands); OperandMatchResultTy parseUNorm(OperandVector &Operands); @@ -565,6 +598,8 @@ OperandMatchResultTy parseR128(OperandVector &Operands); OperandMatchResultTy parseLWE(OperandVector &Operands); + OperandMatchResultTy parseOModOperand(OperandVector &Operands); + void cvtId(MCInst &Inst, const OperandVector &Operands); void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands); void cvtVOP3_2_nomod(MCInst &Inst, const OperandVector &Operands); @@ -573,10 +608,8 @@ void cvtMIMG(MCInst &Inst, const OperandVector &Operands); void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); - OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands); - OperandMatchResultTy parseDPPCtrlOps(OperandVector &Operands); - OperandMatchResultTy parseDPPOptionalOps(OperandVector &Operands); + OperandMatchResultTy parseDPPCtrlOps(OperandVector &Operands, bool AddDefault); void cvtDPP_mod(MCInst &Inst, const OperandVector &Operands); void cvtDPP_nomod(MCInst &Inst, const OperandVector &Operands); void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool HasMods); @@ -1109,8 +1142,8 @@ return true; if (Op.isImm() && Op.hasModifiers()) return true; - if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod || - Op.getImmTy() == AMDGPUOperand::ImmTyClamp)) + if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOModSI || + Op.getImmTy() == AMDGPUOperand::ImmTyClampSI)) return true; } return false; @@ -1213,12 +1246,19 @@ } Operands.push_back(std::move(R)); } else { - ResTy = parseVOP3OptionalOps(Operands); if (ResTy == MatchOperand_NoMatch) { const auto &Tok = Parser.getTok(); Operands.push_back(AMDGPUOperand::CreateToken(Tok.getString(), Tok.getLoc())); Parser.Lex(); + if (getLexer().is(AsmToken::Colon)) { + Parser.Lex(); + if (getLexer().is(AsmToken::Identifier)) { + Parser.Lex(); + } + } + } else { + return ResTy; } } return MatchOperand_Success; @@ -1243,6 +1283,10 @@ // Add the instruction mnemonic Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc)); + + if (Name.endswith("_e64")) { Name = Name.substr(0, Name.size() - 4); } + if (Name.endswith("_e32")) { Name = Name.substr(0, Name.size() - 4); } + while (!getLexer().is(AsmToken::EndOfStatement)) { AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name); @@ -1268,7 +1312,7 @@ AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int, - int64_t Default) { + int64_t Default, bool AddDefault) { // We are at the end of the statement, and this is a default argument, so // use a default value. if (getLexer().is(AsmToken::EndOfStatement)) { @@ -1279,9 +1323,14 @@ switch(getLexer().getKind()) { default: return MatchOperand_NoMatch; case AsmToken::Identifier: { - StringRef OffsetName = Parser.getTok().getString(); - if (!OffsetName.equals(Prefix)) + StringRef Name = Parser.getTok().getString(); + if (!Name.equals(Prefix)) { + if (AddDefault) { + Int = Default; + return MatchOperand_Success; + } return MatchOperand_NoMatch; + } Parser.Lex(); if (getLexer().isNot(AsmToken::Colon)) @@ -1301,22 +1350,29 @@ AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy) { + enum AMDGPUOperand::ImmTy ImmTy, + int64_t Default, bool AddDefault, + bool (*ConvertResult)(int64_t&)) { SMLoc S = Parser.getTok().getLoc(); - int64_t Offset = 0; + int64_t Value = 0; - AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset); + AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value, Default, AddDefault); if (Res != MatchOperand_Success) return Res; - Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy)); + if (ConvertResult && !ConvertResult(Value)) { + return MatchOperand_ParseFail; + } + + Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy)); return MatchOperand_Success; } AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, - enum AMDGPUOperand::ImmTy ImmTy) { + enum AMDGPUOperand::ImmTy ImmTy, + bool AddDefault) { int64_t Bit = 0; SMLoc S = Parser.getTok().getLoc(); @@ -1333,7 +1389,11 @@ Bit = 0; Parser.Lex(); } else { - return MatchOperand_NoMatch; + if (AddDefault) { + Bit = 0; + } else { + return MatchOperand_NoMatch; + } } break; } @@ -1438,47 +1498,6 @@ // ds //===----------------------------------------------------------------------===// -static const OptionalOperand DSOptionalOps [] = { - {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr}, - {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr} -}; - -static const OptionalOperand DSOptionalOpsOff01 [] = { - {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr}, - {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr}, - {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr} -}; - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) { - return parseOptionalOps(DSOptionalOps, Operands); -} -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) { - return parseOptionalOps(DSOptionalOpsOff01, Operands); -} - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) { - SMLoc S = Parser.getTok().getLoc(); - AMDGPUAsmParser::OperandMatchResultTy Res = - parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); - if (Res == MatchOperand_NoMatch) { - Operands.push_back(AMDGPUOperand::CreateImm(0, S, - AMDGPUOperand::ImmTyOffset)); - Res = MatchOperand_Success; - } - return Res; -} - -bool AMDGPUOperand::isDSOffset() const { - return isImm() && isUInt<16>(getImm()); -} - -bool AMDGPUOperand::isDSOffset01() const { - return isImm() && isUInt<8>(getImm()); -} - void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, const OperandVector &Operands) { @@ -1497,8 +1516,8 @@ OptionalIdx[Op.getImmTy()] = i; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDSOffset0); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDSOffset1); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 @@ -1612,7 +1631,7 @@ return MatchOperand_Success; } -bool AMDGPUAsmParser::parseHwreg(int64_t &HwRegCode, int64_t &Offset, int64_t &Width) { +bool AMDGPUAsmParser::parseHwregOperand(int64_t &HwRegCode, int64_t &Offset, int64_t &Width) { if (Parser.getTok().getString() != "hwreg") return true; Parser.Lex(); @@ -1658,7 +1677,7 @@ } AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseHwregOp(OperandVector &Operands) { +AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { int64_t Imm16Val = 0; SMLoc S = Parser.getTok().getLoc(); @@ -1679,7 +1698,7 @@ int64_t HwRegCode = 0; int64_t Offset = 0; // default int64_t Width = 32; // default - if (parseHwreg(HwRegCode, Offset, Width)) + if (parseHwregOperand(HwRegCode, Offset, Width)) return MatchOperand_ParseFail; // HwRegCode (6) [5:0] // Offset (5) [10:6] @@ -1737,27 +1756,6 @@ // flat //===----------------------------------------------------------------------===// -static const OptionalOperand FlatOptionalOps [] = { - {"glc", AMDGPUOperand::ImmTyGLC, true, 0, nullptr}, - {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr}, - {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr} -}; - -static const OptionalOperand FlatAtomicOptionalOps [] = { - {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr}, - {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr} -}; - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseFlatOptionalOps(OperandVector &Operands) { - return parseOptionalOps(FlatOptionalOps, Operands); -} - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseFlatAtomicOptionalOps(OperandVector &Operands) { - return parseOptionalOps(FlatAtomicOptionalOps, Operands); -} - void AMDGPUAsmParser::cvtFlat(MCInst &Inst, const OperandVector &Operands) { OptionalImmIndexMap OptionalIdx; @@ -1808,38 +1806,6 @@ // mubuf //===----------------------------------------------------------------------===// -static const OptionalOperand MubufOptionalOps [] = { - {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr}, - {"glc", AMDGPUOperand::ImmTyGLC, true, 0, nullptr}, - {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr}, - {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr} -}; - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) { - return parseOptionalOps(MubufOptionalOps, Operands); -} - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseOffset(OperandVector &Operands) { - return parseIntWithPrefix("offset", Operands); -} - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseGLC(OperandVector &Operands) { - return parseNamedBit("glc", Operands); -} - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseSLC(OperandVector &Operands) { - return parseNamedBit("slc", Operands); -} - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseTFE(OperandVector &Operands) { - return parseNamedBit("tfe", Operands); -} - bool AMDGPUOperand::isMubufOffset() const { return isImmTy(ImmTyOffset) && isUInt<12>(getImm()); } @@ -1952,64 +1918,101 @@ return false; } -static const OptionalOperand VOP3OptionalOps [] = { - {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr}, - {"mul", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul}, - {"div", AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv}, -}; - -static bool isVOP3(OperandVector &Operands) { - if (operandsHaveModifiers(Operands)) +static bool ConvertBoundCtrl(int64_t &BoundCtrl) { + if (BoundCtrl == 0) { + BoundCtrl = 1; + return true; + } else if (BoundCtrl == -1) { + BoundCtrl = 0; return true; - - if (Operands.size() >= 2) { - AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]); - - if (DstOp.isRegClass(AMDGPU::SGPR_64RegClassID)) - return true; } + return false; +} - if (Operands.size() >= 5) - return true; +// Note: the order in this table matches the order of operands in AsmString. +static const OptionalOperand AMDGPUOperandTable[] = { + {"offen", AMDGPUOperand::ImmTyOffen, true, 0, nullptr}, + {"offset0", AMDGPUOperand::ImmTyOffset0, false, 0, nullptr}, + {"offset1", AMDGPUOperand::ImmTyOffset1, false, 0, nullptr}, + {"gds", AMDGPUOperand::ImmTyGDS, true, 0, nullptr}, + {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr}, + {"glc", AMDGPUOperand::ImmTyGLC, true, 0, nullptr}, + {"slc", AMDGPUOperand::ImmTySLC, true, 0, nullptr}, + {"tfe", AMDGPUOperand::ImmTyTFE, true, 0, nullptr}, + {"clamp", AMDGPUOperand::ImmTyClampSI, true, 0, nullptr}, + {"omod", AMDGPUOperand::ImmTyOModSI, false, 1, ConvertOmodMul}, + {"unorm", AMDGPUOperand::ImmTyUNorm, true, 0, nullptr}, + {"da", AMDGPUOperand::ImmTyDA, true, 0, nullptr}, + {"r128", AMDGPUOperand::ImmTyR128, true, 0, nullptr}, + {"lwe", AMDGPUOperand::ImmTyLWE, true, 0, nullptr}, + {"dmask", AMDGPUOperand::ImmTyDMask, false, 0, nullptr}, + {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, -1, nullptr}, + {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, 0xf, nullptr}, + {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, 0xf, nullptr}, + {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, -1, ConvertBoundCtrl}, +}; - if (Operands.size() > 3) { - AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]); - if (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) || - Src1Op.isRegClass(AMDGPU::SReg_64RegClassID)) - return true; +AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands, const OptionalOperand& Op, bool AddDefault) +{ + if (Op.IsBit) { + return parseNamedBit(Op.Name, Operands, Op.Type, AddDefault); + } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { + return parseDPPCtrlOps(Operands, AddDefault); + } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { + return parseOModOperand(Operands); + } else { + return parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.Default, AddDefault, Op.ConvertResult); } - return false; } -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) { - - // The value returned by this function may change after parsing - // an operand so store the original value here. - bool HasModifiers = operandsHaveModifiers(Operands); - - bool IsVOP3 = isVOP3(Operands); - if (HasModifiers || IsVOP3 || - getLexer().isNot(AsmToken::EndOfStatement) || - getForcedEncodingSize() == 64) { - - AMDGPUAsmParser::OperandMatchResultTy Res = - parseOptionalOps(VOP3OptionalOps, Operands); - - if (!HasModifiers && Res == MatchOperand_Success) { - // We have added a modifier operation, so we need to make sure all - // previous register operands have modifiers - for (unsigned i = 2, e = Operands.size(); i != e; ++i) { - AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]); - if ((Op.isReg() || Op.isImm()) && !Op.hasModifiers()) - Op.setModifiers(0); - } +AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseAMDGPUOperand(OperandVector &Operands, StringRef Name) +{ + StringRef Tok; + if (getLexer().isNot(AsmToken::EndOfStatement) && getLexer().getKind() == AsmToken::Identifier) { + Tok = Parser.getTok().getString(); + } + bool optional = false; + if (Tok == "mul" || Tok == "div") { optional = true; } + for (const OptionalOperand &Op1 : AMDGPUOperandTable) { + if (Op1.Name == Tok) { optional = true; break; } + } + // Attemp to parse current optional operand. + for (const OptionalOperand &Op : AMDGPUOperandTable) { + bool parseThis = + Name == "" || + (Op.Name == Name) || + (Name == "omod" && Op.Type == AMDGPUOperand::ImmTyOModSI); + if (parseThis && Tok == Name) { + // Exactly the expected token for optional operand. + // Parse it and add operand normally. + return parseOptionalOperand(Operands, Op, true); + } else if (parseThis) { + // Token for optional operand which is later in the table + // than the one we expect. If needed, add default value + // for the operand we expect, do not consume anything + // and return MatchOperand_NoMatch. Parsing will continue. + return parseOptionalOperand(Operands, Op, optional); + } else if (Op.Name == Tok) { + // This looks like optional operand, but we do not expect it. + // This is the case when AsmString has token in it. + return MatchOperand_NoMatch; } - return Res; } return MatchOperand_NoMatch; } +AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) +{ + StringRef Name = Parser.getTok().getString(); + if (Name == "mul") { + return parseIntWithPrefix("mul", Operands, AMDGPUOperand::ImmTyOModSI, 0, false, ConvertOmodMul); + } else if (Name == "div") { + return parseIntWithPrefix("div", Operands, AMDGPUOperand::ImmTyOModSI, 0, false, ConvertOmodDiv); + } else { + return MatchOperand_NoMatch; + } +} + void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) { unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); @@ -2060,8 +2063,8 @@ } } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOMod); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); } void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) { @@ -2090,11 +2093,11 @@ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); } void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { @@ -2158,7 +2161,7 @@ } AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseDPPCtrlOps(OperandVector &Operands) { +AMDGPUAsmParser::parseDPPCtrlOps(OperandVector &Operands, bool AddDefault) { SMLoc S = Parser.getTok().getLoc(); StringRef Prefix; int64_t Int; @@ -2184,7 +2187,12 @@ && Prefix != "wave_shr" && Prefix != "wave_ror" && Prefix != "row_bcast") { - return MatchOperand_NoMatch; + if (AddDefault) { + Operands.push_back(AMDGPUOperand::CreateImm(0, S, AMDGPUOperand::ImmTyDppCtrl)); + return MatchOperand_Success; + } else { + return MatchOperand_NoMatch; + } } Parser.Lex(); @@ -2269,30 +2277,6 @@ return MatchOperand_Success; } -static const OptionalOperand DPPOptionalOps [] = { - {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, 0xf, nullptr}, - {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, 0xf, nullptr}, - {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, -1, nullptr} -}; - -AMDGPUAsmParser::OperandMatchResultTy -AMDGPUAsmParser::parseDPPOptionalOps(OperandVector &Operands) { - SMLoc S = Parser.getTok().getLoc(); - OperandMatchResultTy Res = parseOptionalOps(DPPOptionalOps, Operands); - // XXX - sp3 use syntax "bound_ctrl:0" to indicate that bound_ctrl bit was set - if (Res == MatchOperand_Success) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back()); - // If last operand was parsed as bound_ctrl we should replace it with correct value (1) - if (Op.isImmTy(AMDGPUOperand::ImmTyDppBoundCtrl)) { - Operands.pop_back(); - Operands.push_back( - AMDGPUOperand::CreateImm(1, S, AMDGPUOperand::ImmTyDppBoundCtrl)); - return MatchOperand_Success; - } - } - return Res; -} - void AMDGPUAsmParser::cvtDPP_mod(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h @@ -45,9 +45,11 @@ void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSMRDOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O); @@ -63,10 +65,10 @@ void printImmediate64(uint64_t I, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printDPPCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printRowMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printBankMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); - void printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printDPPCtrl(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printRowMask(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBankMask(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBoundCtrl(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSDWADstSel(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O); Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -94,7 +94,7 @@ } } -void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O) { uint16_t Imm = MI->getOperand(OpNo).getImm(); if (Imm != 0) { @@ -103,7 +103,7 @@ } } -void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (MI->getOperand(OpNo).getImm()) { O << " offset0:"; @@ -111,7 +111,7 @@ } } -void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O) { if (MI->getOperand(OpNo).getImm()) { O << " offset1:"; @@ -119,6 +119,16 @@ } } +void AMDGPUInstPrinter::printSMRDOffset(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printU32ImmOperand(MI, OpNo, O); +} + +void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printU32ImmOperand(MI, OpNo, O); +} + void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printNamedBit(MI, OpNo, O, "gds"); @@ -422,7 +432,7 @@ } -void AMDGPUInstPrinter::printDPPCtrlOperand(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); if (Imm <= 0x0ff) { @@ -461,19 +471,19 @@ } } -void AMDGPUInstPrinter::printRowMaskOperand(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printRowMask(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << " row_mask:"; printU4ImmOperand(MI, OpNo, O); } -void AMDGPUInstPrinter::printBankMaskOperand(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printBankMask(const MCInst *MI, unsigned OpNo, raw_ostream &O) { O << " bank_mask:"; printU4ImmOperand(MI, OpNo, O); } -void AMDGPUInstPrinter::printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, +void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); if (Imm) { Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -461,322 +461,75 @@ include "SIInstrFormats.td" include "VIInstrFormats.td" -def MubufOffsetMatchClass : AsmOperandClass { - let Name = "MubufOffset"; - let ParserMethod = "parseMubufOptionalOps"; +class NamedMatchClass : AsmOperandClass { + let Name = "Imm"#CName; + let PredicateMethod = "is"#CName; + let ParserMethod = "parse"#CName; let RenderMethod = "addImmOperands"; - let IsOptional = 1; + let IsOptional = Optional; } -class DSOffsetBaseMatchClass : AsmOperandClass { - let Name = "DSOffset"#parser; - let ParserMethod = parser; - let RenderMethod = "addImmOperands"; - let PredicateMethod = "isDSOffset"; - let IsOptional = 1; -} - -def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">; -def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">; - -def DSOffset01MatchClass : AsmOperandClass { - let Name = "DSOffset1"; - let ParserMethod = "parseDSOff01OptionalOps"; - let RenderMethod = "addImmOperands"; - let PredicateMethod = "isDSOffset01"; - let IsOptional = 1; -} - -class GDSBaseMatchClass : AsmOperandClass { - let Name = "GDS"#parser; - let PredicateMethod = "isGDS"; - let ParserMethod = parser; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">; -def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">; - -class GLCBaseMatchClass : AsmOperandClass { - let Name = "GLC"#parser; - let PredicateMethod = "isGLC"; - let ParserMethod = parser; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -def GLCMubufMatchClass : GLCBaseMatchClass <"parseMubufOptionalOps">; -def GLCFlatMatchClass : GLCBaseMatchClass <"parseFlatOptionalOps">; - -class SLCBaseMatchClass : AsmOperandClass { - let Name = "SLC"#parser; - let PredicateMethod = "isSLC"; - let ParserMethod = parser; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -def SLCMubufMatchClass : SLCBaseMatchClass <"parseMubufOptionalOps">; -def SLCFlatMatchClass : SLCBaseMatchClass <"parseFlatOptionalOps">; -def SLCFlatAtomicMatchClass : SLCBaseMatchClass <"parseFlatAtomicOptionalOps">; - -class TFEBaseMatchClass : AsmOperandClass { - let Name = "TFE"#parser; - let PredicateMethod = "isTFE"; - let ParserMethod = parser; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -def TFEMubufMatchClass : TFEBaseMatchClass <"parseMubufOptionalOps">; -def TFEFlatMatchClass : TFEBaseMatchClass <"parseFlatOptionalOps">; -def TFEFlatAtomicMatchClass : TFEBaseMatchClass <"parseFlatAtomicOptionalOps">; - -def OModMatchClass : AsmOperandClass { - let Name = "OMod"; - let PredicateMethod = "isOMod"; - let ParserMethod = "parseVOP3OptionalOps"; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} +def sdwa_sel : NamedMatchClass<"SDWASel">; -def ClampMatchClass : AsmOperandClass { - let Name = "Clamp"; - let PredicateMethod = "isClamp"; - let ParserMethod = "parseVOP3OptionalOps"; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; +class NamedOperandBit : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; } - -class SMRDOffsetBaseMatchClass : AsmOperandClass { - let Name = "SMRDOffset"#predicate; - let PredicateMethod = predicate; - let RenderMethod = "addImmOperands"; + +class NamedOperandU8 : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; } -def SMRDOffsetMatchClass : SMRDOffsetBaseMatchClass <"isSMRDOffset">; -def SMRDLiteralOffsetMatchClass : SMRDOffsetBaseMatchClass < - "isSMRDLiteralOffset" ->; - -def DPPCtrlMatchClass : AsmOperandClass { - let Name = "DPPCtrl"; - let PredicateMethod = "isDPPCtrl"; - let ParserMethod = "parseDPPCtrlOps"; - let RenderMethod = "addImmOperands"; - let IsOptional = 0; +class NamedOperandU16 : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; } -class DPPOptionalMatchClass : AsmOperandClass { - let Name = "DPPOptional"#OpName; - let PredicateMethod = "is"#OpName; - let ParserMethod = "parseDPPOptionalOps"; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -def SDWASelMatchClass : AsmOperandClass { - let Name = "SDWASel"; - let PredicateMethod = "isSDWASel"; - let ParserMethod = "parseSDWASel"; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -def SDWADstUnusedMatchClass : AsmOperandClass { - let Name = "SDWADstUnused"; - let PredicateMethod = "isSDWADstUnused"; - let ParserMethod = "parseSDWADstUnused"; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -class OptionalImmAsmOperand : AsmOperandClass { - let Name = "Imm"#OpName; - let PredicateMethod = "isImm"; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -def DMaskMatchClass : AsmOperandClass { - let Name = "DMask"; - let PredicateMethod = "isDMask"; - let ParserMethod = "parseDMask"; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -class NamedBitMatchClass : AsmOperandClass { - let Name = "Imm"#BitName; - let PredicateMethod = "is"#BitName; - let ParserMethod = "parse"#BitName; - let RenderMethod = "addImmOperands"; - let IsOptional = 1; -} - -class NamedBitOperand : Operand { - let PrintMethod = "print"#BitName; -} - -def HwregMatchClass : AsmOperandClass { - let Name = "Hwreg"; - let PredicateMethod = "isHwreg"; - let ParserMethod = "parseHwregOp"; - let RenderMethod = "addImmOperands"; +class NamedOperandU32 : Operand { + let PrintMethod = "print"#Name; + let ParserMatchClass = MatchClass; } let OperandType = "OPERAND_IMMEDIATE" in { -def offen : Operand { - let PrintMethod = "printOffen"; - let ParserMatchClass = OptionalImmAsmOperand<"offen">; -} -def idxen : Operand { - let PrintMethod = "printIdxen"; - let ParserMatchClass = OptionalImmAsmOperand<"idxen">; -} -def addr64 : Operand { - let PrintMethod = "printAddr64"; -} -def mbuf_offset : Operand { - let PrintMethod = "printMBUFOffset"; - let ParserMatchClass = MubufOffsetMatchClass; -} -class ds_offset_base : Operand { - let PrintMethod = "printDSOffset"; - let ParserMatchClass = mc; -} -def ds_offset : ds_offset_base ; -def ds_offset_gds : ds_offset_base ; +def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>; +def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>; +def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>; -def ds_offset0 : Operand { - let PrintMethod = "printDSOffset0"; - let ParserMatchClass = DSOffset01MatchClass; -} -def ds_offset1 : Operand { - let PrintMethod = "printDSOffset1"; - let ParserMatchClass = DSOffset01MatchClass; -} -class gds_base : Operand { - let PrintMethod = "printGDS"; - let ParserMatchClass = mc; -} -def gds : gds_base ; +def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>; +def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>; +def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>; -def gds01 : gds_base ; +def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>; -class glc_base : Operand { - let PrintMethod = "printGLC"; - let ParserMatchClass = mc; -} +def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>; +def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>; -def glc : glc_base ; -def glc_flat : glc_base ; +def smrd_offset : NamedOperandU32<"SMRDOffset", NamedMatchClass<"SMRDOffset">>; +def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset", NamedMatchClass<"SMRDLiteralOffset">>; -class slc_base : Operand { - let PrintMethod = "printSLC"; - let ParserMatchClass = mc; -} +def glc : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>; +def slc : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>; +def tfe : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>; +def unorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>; +def da : NamedOperandBit<"DA", NamedMatchClass<"DA">>; +def r128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>; +def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>; -def slc : slc_base ; -def slc_flat : slc_base ; -def slc_flat_atomic : slc_base ; +def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>; -class tfe_base : Operand { - let PrintMethod = "printTFE"; - let ParserMatchClass = mc; -} +def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>; +def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>; +def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>; +def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>; -def tfe : tfe_base ; -def tfe_flat : tfe_base ; -def tfe_flat_atomic : tfe_base ; +def dst_sel : NamedOperandU32<"SDWADstSel", sdwa_sel>; +def src0_sel : NamedOperandU32<"SDWASrc0Sel", sdwa_sel>; +def src1_sel : NamedOperandU32<"SDWASrc1Sel", sdwa_sel>; +def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>; -def omod : Operand { - let PrintMethod = "printOModSI"; - let ParserMatchClass = OModMatchClass; -} - -def ClampMod : Operand { - let PrintMethod = "printClampSI"; - let ParserMatchClass = ClampMatchClass; -} - -def smrd_offset : Operand { - let PrintMethod = "printU32ImmOperand"; - let ParserMatchClass = SMRDOffsetMatchClass; -} - -def smrd_literal_offset : Operand { - let PrintMethod = "printU32ImmOperand"; - let ParserMatchClass = SMRDLiteralOffsetMatchClass; -} - -def dmask : Operand { - let PrintMethod = "printDMask"; - let ParserMatchClass = DMaskMatchClass; -} - -def unorm : NamedBitOperand<"UNorm"> { - let ParserMatchClass = NamedBitMatchClass<"UNorm">; -} - -def da : NamedBitOperand<"DA"> { - let ParserMatchClass = NamedBitMatchClass<"DA">; -} - -def r128 : NamedBitOperand<"R128"> { - let ParserMatchClass = NamedBitMatchClass<"R128">; -} - -def lwe : NamedBitOperand<"LWE"> { - let ParserMatchClass = NamedBitMatchClass<"LWE">; -} - -def dpp_ctrl : Operand { - let PrintMethod = "printDPPCtrlOperand"; - let ParserMatchClass = DPPCtrlMatchClass; -} - -def row_mask : Operand { - let PrintMethod = "printRowMaskOperand"; - let ParserMatchClass = DPPOptionalMatchClass<"RowMask">; -} - -def bank_mask : Operand { - let PrintMethod = "printBankMaskOperand"; - let ParserMatchClass = DPPOptionalMatchClass<"BankMask">; -} - -def bound_ctrl : Operand { - let PrintMethod = "printBoundCtrlOperand"; - let ParserMatchClass = DPPOptionalMatchClass<"BoundCtrl">; -} - -def dst_sel : Operand { - let PrintMethod = "printSDWADstSel"; - let ParserMatchClass = SDWASelMatchClass; -} - -def src0_sel : Operand { - let PrintMethod = "printSDWASrc0Sel"; - let ParserMatchClass = SDWASelMatchClass; -} - -def src1_sel : Operand { - let PrintMethod = "printSDWASrc1Sel"; - let ParserMatchClass = SDWASelMatchClass; -} - -def hwreg : Operand { - let PrintMethod = "printHwreg"; - let ParserMatchClass = HwregMatchClass; -} - -def dst_unused : Operand { - let PrintMethod = "printSDWADstUnused"; - let ParserMatchClass = SDWADstUnusedMatchClass; -} +def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg">>; } // End OperandType = "OPERAND_IMMEDIATE" @@ -1401,7 +1154,7 @@ !if (!eq(HasModifiers, 1), // VOP1 with modifiers (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, - ClampMod:$clamp, omod:$omod) + clampmod:$clamp, omod:$omod) /* else */, // VOP1 without modifiers (ins Src0RC:$src0) @@ -1411,7 +1164,7 @@ // VOP 2 with modifiers (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, InputModsNoDefault:$src1_modifiers, Src1RC:$src1, - ClampMod:$clamp, omod:$omod) + clampmod:$clamp, omod:$omod) /* else */, // VOP2 without modifiers (ins Src0RC:$src0, Src1RC:$src1) @@ -1422,7 +1175,7 @@ (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, InputModsNoDefault:$src1_modifiers, Src1RC:$src1, InputModsNoDefault:$src2_modifiers, Src2RC:$src2, - ClampMod:$clamp, omod:$omod) + clampmod:$clamp, omod:$omod) /* else */, // VOP3 without modifiers (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2) @@ -1472,7 +1225,7 @@ !if (!eq(HasModifiers, 1), // VOP1_SDWA with modifiers (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, - ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel) /* else */, // VOP1_SDWA without modifiers @@ -1484,7 +1237,7 @@ // VOP2_SDWA with modifiers (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0, InputModsNoDefault:$src1_modifiers, Src1RC:$src1, - ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel) /* else */, // VOP2_DPP without modifiers @@ -1545,7 +1298,7 @@ string args = !if(!eq(HasModifiers, 0), getAsm32<0, NumSrcArgs, DstVT>.ret, ", "#src0#src1); - string ret = dst#args#" $dpp_ctrl $row_mask $bank_mask $bound_ctrl"; + string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; } class getAsmSDWA { @@ -1769,7 +1522,7 @@ let InsSDWA = (ins InputModsNoDefault:$src0_modifiers, Src0RC32:$src0, InputModsNoDefault:$src1_modifiers, Src1RC32:$src1, VGPR_32:$src2, // stub argument - ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, + clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, f32>.ret; let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret; @@ -2548,7 +2301,7 @@ (ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0, InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1, InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2, - ClampMod:$clamp, + clampmod:$clamp, omod:$omod), "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", [(set P.DstVT:$vdst, @@ -2668,7 +2421,7 @@ multiclass DS_1A_RET op, string opName, RegisterClass rc, dag outs = (outs rc:$vdst), - dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds), + dag ins = (ins VGPR_32:$addr, offset:$offset, gds:$gds), string asm = opName#" $vdst, $addr"#"$offset$gds"> { def "" : DS_Pseudo ; @@ -2681,8 +2434,8 @@ multiclass DS_1A_Off8_RET op, string opName, RegisterClass rc, dag outs = (outs rc:$vdst), - dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1, - gds01:$gds), + dag ins = (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, + gds:$gds), string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> { def "" : DS_Pseudo ; @@ -2695,7 +2448,7 @@ multiclass DS_1A1D_NORET op, string opName, RegisterClass rc, dag outs = (outs), - dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds), + dag ins = (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds), string asm = opName#" $addr, $data0"#"$offset$gds"> { def "" : DS_Pseudo , @@ -2710,8 +2463,8 @@ multiclass DS_1A1D_Off8_NORET op, string opName, RegisterClass rc, dag outs = (outs), dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1, - ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds), - string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> { + offset0:$offset0, offset1:$offset1, gds:$gds), + string asm = opName#" $addr, $data0, $data1$offset0$offset1$gds"> { def "" : DS_Pseudo ; @@ -2724,7 +2477,7 @@ multiclass DS_1A1D_RET op, string opName, RegisterClass rc, string noRetOp = "", dag outs = (outs rc:$vdst), - dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds), + dag ins = (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds), string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> { let hasPostISelHook = 1 in { @@ -2773,14 +2526,14 @@ string noRetOp = "", RegisterClass src = rc> : DS_1A2D_RET_m ; multiclass DS_1A2D_NORET op, string opName, RegisterClass rc, string noRetOp = opName, dag outs = (outs), dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1, - ds_offset:$offset, gds:$gds), + offset:$offset, gds:$gds), string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> { def "" : DS_Pseudo , @@ -2794,7 +2547,7 @@ multiclass DS_0A_RET op, string opName, dag outs = (outs VGPR_32:$vdst), - dag ins = (ins ds_offset:$offset, gds:$gds), + dag ins = (ins offset:$offset, gds:$gds), string asm = opName#" $vdst"#"$offset"#"$gds"> { let mayLoad = 1, mayStore = 1 in { @@ -2809,7 +2562,7 @@ multiclass DS_1A_RET_GDS op, string opName, dag outs = (outs VGPR_32:$vdst), - dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset), + dag ins = (ins VGPR_32:$addr, offset:$offset), string asm = opName#" $vdst, $addr"#"$offset gds"> { def "" : DS_Pseudo ; @@ -2835,7 +2588,7 @@ multiclass DS_1A op, string opName, dag outs = (outs), - dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds), + dag ins = (ins VGPR_32:$addr, offset:$offset, gds:$gds), string asm = opName#" $addr"#"$offset"#"$gds"> { let mayLoad = 1, mayStore = 1 in { @@ -3065,23 +2818,23 @@ defm _ADDR64 : MUBUFAtomicAddr64_m < op, name#"_addr64", (outs), (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0 + SCSrc_32:$soffset, offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$slc", [], 0 >; defm _OFFSET : MUBUFAtomicOffset_m < op, name#"_offset", (outs), - (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset, + (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset, slc:$slc), - name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0 + name#" $vdata, off, $srsrc, $soffset$offset$slc", [], 0 >; let offen = 1, idxen = 0 in { defm _OFFEN : MUBUFAtomicOther_m < op, name#"_offen", (outs), (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, - mbuf_offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$slc", [], 0 + offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$slc", [], 0 >; } @@ -3089,8 +2842,8 @@ defm _IDXEN : MUBUFAtomicOther_m < op, name#"_idxen", (outs), (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, - mbuf_offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$slc", [], 0 + offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$slc", [], 0 >; } @@ -3098,8 +2851,8 @@ defm _BOTHEN : MUBUFAtomicOther_m < op, name#"_bothen", (outs), (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, - mbuf_offset:$offset, slc:$slc), - name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$slc", + offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$slc", [], 0 >; } @@ -3112,7 +2865,7 @@ defm _RTN_ADDR64 : MUBUFAtomicAddr64_m < op, name#"_rtn_addr64", (outs rc:$vdata), (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, - SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc), + SCSrc_32:$soffset, offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc", [(set vt:$vdata, (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, @@ -3122,8 +2875,8 @@ defm _RTN_OFFSET : MUBUFAtomicOffset_m < op, name#"_rtn_offset", (outs rc:$vdata), (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset, - mbuf_offset:$offset, slc:$slc), - name#" $vdata, $srsrc, $soffset"#"$offset"#" glc$slc", + offset:$offset, slc:$slc), + name#" $vdata, off, $srsrc, $soffset $offset glc$slc", [(set vt:$vdata, (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc), vt:$vdata_in))], 1 @@ -3133,7 +2886,7 @@ defm _RTN_OFFEN : MUBUFAtomicOther_m < op, name#"_rtn_offen", (outs rc:$vdata), (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, - mbuf_offset:$offset, slc:$slc), + offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#" glc"#"$slc", [], 1 >; @@ -3143,7 +2896,7 @@ defm _RTN_IDXEN : MUBUFAtomicOther_m < op, name#"_rtn_idxen", (outs rc:$vdata), (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, - mbuf_offset:$offset, slc:$slc), + offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#" glc"#"$slc", [], 1 >; @@ -3153,7 +2906,7 @@ defm _RTN_BOTHEN : MUBUFAtomicOther_m < op, name#"_rtn_bothen", (outs rc:$vdata), (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset, - mbuf_offset:$offset, slc:$slc), + offset:$offset, slc:$slc), name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#" glc"#"$slc", [], 1 >; @@ -3173,8 +2926,8 @@ let offen = 0, idxen = 0, vaddr = 0 in { defm _OFFSET : MUBUF_m ; @@ -3183,33 +2936,32 @@ let offen = 1, idxen = 0 in { defm _OFFEN : MUBUF_m ; + name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$glc$slc$tfe", []>; } let offen = 0, idxen = 1 in { defm _IDXEN : MUBUF_m ; + name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>; } let offen = 1, idxen = 1 in { defm _BOTHEN : MUBUF_m ; + offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>; } let offen = 0, idxen = 0 in { defm _ADDR64 : MUBUFAddr64_m { let mayLoad = 0, mayStore = 1 in { - defm : MUBUF_m ; - let offen = 0, idxen = 0, vaddr = 0 in { defm _OFFSET : MUBUF_m ; } // offen = 0, idxen = 0, vaddr = 0 @@ -3240,35 +2985,35 @@ let offen = 1, idxen = 0 in { defm _OFFEN : MUBUF_m ; + name#" $vdata, $vaddr, $srsrc, $soffset offen"# + "$offset$glc$slc$tfe", []>; } // end offen = 1, idxen = 0 let offen = 0, idxen = 1 in { defm _IDXEN : MUBUF_m ; + name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>; } let offen = 1, idxen = 1 in { defm _BOTHEN : MUBUF_m ; + offset:$offset, glc:$glc, slc:$slc, tfe:$tfe), + name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>; } let offen = 0, idxen = 0 in { defm _ADDR64 : MUBUFAddr64_m { + dag ins = (ins VReg_64:$addr, glc:$glc, slc:$slc, tfe:$tfe), + string asm = asm_name#" $vdst, $addr$glc$slc$tfe"> { let data = 0, mayLoad = 1 in { @@ -3354,9 +3099,9 @@ multiclass FLAT_Store_Helper { + dag ins = (ins VReg_64:$addr, vdataClass:$data, glc:$glc, + slc:$slc, tfe:$tfe), + string asm = asm_name#" $addr, $data$glc$slc$tfe"> { let mayLoad = 0, mayStore = 1, vdst = 0 in { @@ -3376,25 +3121,25 @@ let mayLoad = 1, mayStore = 1, glc = 0, vdst = 0, AsmMatchConverter = "cvtFlatAtomic" in { def "" : FLAT_Pseudo , + slc:$slc, tfe:$tfe), []>, AtomicNoRet ; def _ci : FLAT_Real_ci ; def _vi : FLAT_Real_vi ; } let glc = 1, hasPostISelHook = 1, AsmMatchConverter = "cvtFlatAtomic" in { defm _RTN : FLAT_AtomicRet_m ; + (ins VReg_64:$addr, data_rc:$data, slc:$slc, + tfe:$tfe), + asm_name#" $vdst, $addr, $data glc$slc$tfe", []>; } } Index: test/CodeGen/AMDGPU/captured-frame-index.ll =================================================================== --- test/CodeGen/AMDGPU/captured-frame-index.ll +++ test/CodeGen/AMDGPU/captured-frame-index.ll @@ -123,10 +123,10 @@ ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen ; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}} -; GCN: buffer_store_dword [[FI1]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}} -; GCN: buffer_store_dword [[FI2]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 { %tmp0 = alloca float %tmp1 = alloca float @@ -150,7 +150,7 @@ ; GCN-DAG: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56 ; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]] -; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 { %tmp0 = alloca [4096 x i32] %tmp1 = alloca [4096 x i32] Index: test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll +++ test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -42,7 +42,7 @@ ; OPT: br label ; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32: -; CI: buffer_load_dword {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 +; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 define void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) { entry: %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999 Index: test/CodeGen/AMDGPU/cgp-addressing-modes.ll =================================================================== --- test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -40,7 +40,7 @@ ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset: ; GCN: s_and_saveexec_b64 -; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} +; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} ; GCN: {{^}}BB1_2: ; GCN: s_or_b64 exec define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { @@ -67,7 +67,7 @@ ; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset: ; GCN: s_and_saveexec_b64 -; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}} +; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}} ; GCN: {{^}}BB2_2: ; GCN: s_or_b64 exec define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { @@ -94,7 +94,7 @@ ; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset: ; GCN: s_and_saveexec_b64 -; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} +; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} ; GCN: {{^}}BB3_2: ; GCN: s_or_b64 exec define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { Index: test/CodeGen/AMDGPU/ctpop.ll =================================================================== --- test/CodeGen/AMDGPU/ctpop.ll +++ test/CodeGen/AMDGPU/ctpop.ll @@ -250,8 +250,8 @@ } ; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv: -; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}} -; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16 +; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}} +; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; GCN: buffer_store_dword [[RESULT]], Index: test/CodeGen/AMDGPU/fdiv.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.f64.ll +++ test/CodeGen/AMDGPU/fdiv.f64.ll @@ -4,8 +4,8 @@ ; COMMON-LABEL: {{^}}fdiv_f64: -; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 -; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], off, {{s\[[0-9]+:[0-9]+\]}}, 0 +; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; CI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]] ; CI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], vcc, [[NUM]], [[DEN]], [[NUM]] Index: test/CodeGen/AMDGPU/fmax3.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fmax3.f64.ll +++ test/CodeGen/AMDGPU/fmax3.f64.ll @@ -4,9 +4,9 @@ declare double @llvm.maxnum.f64(double, double) nounwind readnone ; SI-LABEL: {{^}}test_fmax3_f64: -; SI-DAG: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0{{$}} -; SI-DAG: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:8 -; SI-DAG: buffer_load_dwordx2 [[REGC:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:16 +; SI-DAG: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}} +; SI-DAG: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8 +; SI-DAG: buffer_load_dwordx2 [[REGC:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16 ; SI: v_max_f64 [[REGA]], [[REGA]], [[REGB]] ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[REGA]], [[REGC]] ; SI: buffer_store_dwordx2 [[RESULT]], Index: test/CodeGen/AMDGPU/global_atomics.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics.ll +++ test/CodeGen/AMDGPU/global_atomics.ll @@ -3,7 +3,7 @@ ; FUNC-LABEL: {{^}}atomic_add_i32_offset: -; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -12,7 +12,7 @@ } ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset: -; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -48,7 +48,7 @@ } ; FUNC-LABEL: {{^}}atomic_add_i32: -; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst @@ -56,7 +56,7 @@ } ; FUNC-LABEL: {{^}}atomic_add_i32_ret: -; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -88,7 +88,7 @@ } ; FUNC-LABEL: {{^}}atomic_and_i32_offset: -; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -97,7 +97,7 @@ } ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset: -; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -132,7 +132,7 @@ } ; FUNC-LABEL: {{^}}atomic_and_i32: -; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst @@ -140,7 +140,7 @@ } ; FUNC-LABEL: {{^}}atomic_and_i32_ret: -; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -172,7 +172,7 @@ } ; FUNC-LABEL: {{^}}atomic_sub_i32_offset: -; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -181,7 +181,7 @@ } ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset: -; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -216,7 +216,7 @@ } ; FUNC-LABEL: {{^}}atomic_sub_i32: -; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst @@ -224,7 +224,7 @@ } ; FUNC-LABEL: {{^}}atomic_sub_i32_ret: -; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -256,7 +256,7 @@ } ; FUNC-LABEL: {{^}}atomic_max_i32_offset: -; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -265,7 +265,7 @@ } ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset: -; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -300,7 +300,7 @@ } ; FUNC-LABEL: {{^}}atomic_max_i32: -; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst @@ -308,7 +308,7 @@ } ; FUNC-LABEL: {{^}}atomic_max_i32_ret: -; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -340,7 +340,7 @@ } ; FUNC-LABEL: {{^}}atomic_umax_i32_offset: -; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -349,7 +349,7 @@ } ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset: -; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -384,7 +384,7 @@ } ; FUNC-LABEL: {{^}}atomic_umax_i32: -; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst @@ -392,7 +392,7 @@ } ; FUNC-LABEL: {{^}}atomic_umax_i32_ret: -; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -424,7 +424,7 @@ } ; FUNC-LABEL: {{^}}atomic_min_i32_offset: -; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -433,7 +433,7 @@ } ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset: -; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -468,7 +468,7 @@ } ; FUNC-LABEL: {{^}}atomic_min_i32: -; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst @@ -476,7 +476,7 @@ } ; FUNC-LABEL: {{^}}atomic_min_i32_ret: -; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -508,7 +508,7 @@ } ; FUNC-LABEL: {{^}}atomic_umin_i32_offset: -; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -517,7 +517,7 @@ } ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset: -; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -552,7 +552,7 @@ } ; FUNC-LABEL: {{^}}atomic_umin_i32: -; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst @@ -560,7 +560,7 @@ } ; FUNC-LABEL: {{^}}atomic_umin_i32_ret: -; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -592,7 +592,7 @@ } ; FUNC-LABEL: {{^}}atomic_or_i32_offset: -; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -601,7 +601,7 @@ } ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset: -; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -636,7 +636,7 @@ } ; FUNC-LABEL: {{^}}atomic_or_i32: -; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst @@ -644,7 +644,7 @@ } ; FUNC-LABEL: {{^}}atomic_or_i32_ret: -; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -676,7 +676,7 @@ } ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset: -; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -685,7 +685,7 @@ } ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset: -; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -719,7 +719,7 @@ } ; FUNC-LABEL: {{^}}atomic_xchg_i32: -; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst @@ -727,7 +727,7 @@ } ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret: -; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -761,7 +761,7 @@ ; CMP_SWAP ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset: -; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -770,7 +770,7 @@ } ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset: -; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword v[[RET]] define void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { entry: @@ -806,7 +806,7 @@ } ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32: -; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) { entry: %0 = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst @@ -814,7 +814,7 @@ } ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret: -; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword v[[RET]] define void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) { entry: @@ -848,7 +848,7 @@ } ; FUNC-LABEL: {{^}}atomic_xor_i32_offset: -; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} +; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}} define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { entry: %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4 @@ -857,7 +857,7 @@ } ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset: -; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -892,7 +892,7 @@ } ; FUNC-LABEL: {{^}}atomic_xor_i32: -; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) { entry: %0 = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst @@ -900,7 +900,7 @@ } ; FUNC-LABEL: {{^}}atomic_xor_i32_ret: -; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dword [[RET]] define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { entry: @@ -933,7 +933,7 @@ ; ATOMIC_LOAD ; FUNC-LABEL: {{^}}atomic_load_i32_offset: -; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN: buffer_store_dword [[RET]] define void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { @@ -945,7 +945,7 @@ } ; FUNC-LABEL: {{^}}atomic_load_i32: -; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc ; GCN: buffer_store_dword [[RET]] define void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) { @@ -981,7 +981,7 @@ } ; FUNC-LABEL: {{^}}atomic_load_i64_offset: -; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { @@ -993,7 +993,7 @@ } ; FUNC-LABEL: {{^}}atomic_load_i64: -; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) { @@ -1030,7 +1030,7 @@ ; ATOMIC_STORE ; FUNC-LABEL: {{^}}atomic_store_i32_offset: -; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} +; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}} ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} define void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) { entry: @@ -1040,7 +1040,7 @@ } ; FUNC-LABEL: {{^}}atomic_store_i32: -; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}} +; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}} ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}} define void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) { entry: @@ -1070,7 +1070,7 @@ } ; FUNC-LABEL: {{^}}atomic_store_i64_offset: -; SI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; SI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}} define void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) { entry: @@ -1080,7 +1080,7 @@ } ; FUNC-LABEL: {{^}}atomic_store_i64: -; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc define void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) { entry: Index: test/CodeGen/AMDGPU/global_atomics_i64.ll =================================================================== --- test/CodeGen/AMDGPU/global_atomics_i64.ll +++ test/CodeGen/AMDGPU/global_atomics_i64.ll @@ -3,7 +3,7 @@ ; GCN-LABEL: {{^}}atomic_add_i64_offset: -; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_add_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -12,7 +12,7 @@ } ; GCN-LABEL: {{^}}atomic_add_i64_ret_offset: -; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_add_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -47,7 +47,7 @@ } ; GCN-LABEL: {{^}}atomic_add_i64: -; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_add_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst @@ -55,7 +55,7 @@ } ; GCN-LABEL: {{^}}atomic_add_i64_ret: -; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_add_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -87,7 +87,7 @@ } ; GCN-LABEL: {{^}}atomic_and_i64_offset: -; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_and_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -96,7 +96,7 @@ } ; GCN-LABEL: {{^}}atomic_and_i64_ret_offset: -; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_and_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -131,7 +131,7 @@ } ; GCN-LABEL: {{^}}atomic_and_i64: -; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_and_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst @@ -139,7 +139,7 @@ } ; GCN-LABEL: {{^}}atomic_and_i64_ret: -; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_and_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -171,7 +171,7 @@ } ; GCN-LABEL: {{^}}atomic_sub_i64_offset: -; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_sub_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -180,7 +180,7 @@ } ; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset: -; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_sub_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -215,7 +215,7 @@ } ; GCN-LABEL: {{^}}atomic_sub_i64: -; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_sub_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst @@ -223,7 +223,7 @@ } ; GCN-LABEL: {{^}}atomic_sub_i64_ret: -; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_sub_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -255,7 +255,7 @@ } ; GCN-LABEL: {{^}}atomic_max_i64_offset: -; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_max_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -264,7 +264,7 @@ } ; GCN-LABEL: {{^}}atomic_max_i64_ret_offset: -; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_max_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -299,7 +299,7 @@ } ; GCN-LABEL: {{^}}atomic_max_i64: -; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_max_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst @@ -307,7 +307,7 @@ } ; GCN-LABEL: {{^}}atomic_max_i64_ret: -; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_max_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -339,7 +339,7 @@ } ; GCN-LABEL: {{^}}atomic_umax_i64_offset: -; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_umax_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -348,7 +348,7 @@ } ; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset: -; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_umax_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -383,7 +383,7 @@ } ; GCN-LABEL: {{^}}atomic_umax_i64: -; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_umax_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst @@ -391,7 +391,7 @@ } ; GCN-LABEL: {{^}}atomic_umax_i64_ret: -; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_umax_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -423,7 +423,7 @@ } ; GCN-LABEL: {{^}}atomic_min_i64_offset: -; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_min_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -432,7 +432,7 @@ } ; GCN-LABEL: {{^}}atomic_min_i64_ret_offset: -; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_min_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -467,7 +467,7 @@ } ; GCN-LABEL: {{^}}atomic_min_i64: -; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_min_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst @@ -475,7 +475,7 @@ } ; GCN-LABEL: {{^}}atomic_min_i64_ret: -; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_min_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -507,7 +507,7 @@ } ; GCN-LABEL: {{^}}atomic_umin_i64_offset: -; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_umin_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -516,7 +516,7 @@ } ; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset: -; GCN: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_umin_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -551,7 +551,7 @@ } ; GCN-LABEL: {{^}}atomic_umin_i64: -; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_umin_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst @@ -559,7 +559,7 @@ } ; GCN-LABEL: {{^}}atomic_umin_i64_ret: -; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_umin_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -591,7 +591,7 @@ } ; GCN-LABEL: {{^}}atomic_or_i64_offset: -; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_or_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -600,7 +600,7 @@ } ; GCN-LABEL: {{^}}atomic_or_i64_ret_offset: -; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_or_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -635,7 +635,7 @@ } ; GCN-LABEL: {{^}}atomic_or_i64: -; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_or_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst @@ -643,7 +643,7 @@ } ; GCN-LABEL: {{^}}atomic_or_i64_ret: -; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_or_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -675,7 +675,7 @@ } ; GCN-LABEL: {{^}}atomic_xchg_i64_offset: -; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_xchg_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -684,7 +684,7 @@ } ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset: -; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_xchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -718,7 +718,7 @@ } ; GCN-LABEL: {{^}}atomic_xchg_i64: -; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_xchg_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst @@ -726,7 +726,7 @@ } ; GCN-LABEL: {{^}}atomic_xchg_i64_ret: -; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_xchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -758,7 +758,7 @@ } ; GCN-LABEL: {{^}}atomic_xor_i64_offset: -; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} +; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}} define void @atomic_xor_i64_offset(i64 addrspace(1)* %out, i64 %in) { entry: %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4 @@ -767,7 +767,7 @@ } ; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset: -; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} +; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}} ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_xor_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: @@ -802,7 +802,7 @@ } ; GCN-LABEL: {{^}}atomic_xor_i64: -; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} define void @atomic_xor_i64(i64 addrspace(1)* %out, i64 %in) { entry: %tmp0 = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst @@ -810,7 +810,7 @@ } ; GCN-LABEL: {{^}}atomic_xor_i64_ret: -; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc ; GCN: buffer_store_dwordx2 [[RET]] define void @atomic_xor_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) { entry: Index: test/CodeGen/AMDGPU/half.ll =================================================================== --- test/CodeGen/AMDGPU/half.ll +++ test/CodeGen/AMDGPU/half.ll @@ -13,10 +13,10 @@ } ; GCN-LABEL: {{^}}load_v2f16_arg: -; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 -; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46 -; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 +; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46 +; GCN-DAG: buffer_store_short [[V0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_short [[V1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} ; GCN: s_endpgm define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 { store <2 x half> %arg, <2 x half> addrspace(1)* %out @@ -280,8 +280,8 @@ } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32: -; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}} @@ -378,8 +378,8 @@ } ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64: -; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}} ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]] ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]] ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]] @@ -407,8 +407,8 @@ ; GCN: v_cvt_f64_f32_e32 ; GCN-NOT: v_cvt_f64_f32_e32 -; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 ; GCN: s_endpgm define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 { %val = load <3 x half>, <3 x half> addrspace(1)* %in Index: test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll =================================================================== --- test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll +++ test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll @@ -21,7 +21,7 @@ ; GCN-LABEL: {{^}}test_merge_store_constant_i16_invariant_constant_pointer_load: ; GCN: s_load_dwordx2 s{{\[}}[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]{{\]}} ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b -; GCN: buffer_store_dword [[K]], s{{\[}}[[SPTR_LO]]: +; GCN: buffer_store_dword [[K]], off, s{{\[}}[[SPTR_LO]]: define void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(2)* dereferenceable(4096) nonnull %in) #0 { %ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(2)* %in, !invariant.load !0 %ptr.1 = getelementptr i16, i16 addrspace(1)* %ptr, i64 1 @@ -32,4 +32,4 @@ !0 = !{} -attributes #0 = { nounwind } \ No newline at end of file +attributes #0 = { nounwind } Index: test/CodeGen/AMDGPU/llvm.SI.load.dword.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.SI.load.dword.ll +++ test/CodeGen/AMDGPU/llvm.SI.load.dword.ll @@ -7,7 +7,7 @@ ; FIXME: Out of bounds immediate offset crashes ; CHECK-LABEL: {{^}}main: -; CHECK: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc +; CHECK: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc slc ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen glc slc ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen glc slc Index: test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll @@ -49,7 +49,7 @@ ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 -; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} +; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42) store i32 %result, i32 addrspace(1)* %out @@ -58,7 +58,7 @@ ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 -; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} +; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42) @@ -67,7 +67,7 @@ } ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32: -; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind { %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42) ret void @@ -75,7 +75,7 @@ ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 -; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42) @@ -166,7 +166,7 @@ ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64: ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} +; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} define void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42) store i64 %result, i64 addrspace(1)* %out @@ -176,7 +176,7 @@ ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset: ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} +; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} define void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42) @@ -187,7 +187,7 @@ ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64: ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} define void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind { %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42) ret void @@ -196,7 +196,7 @@ ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset: ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} +; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} define void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42) Index: test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -49,7 +49,7 @@ ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 -; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} +; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} define void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42) store i32 %result, i32 addrspace(1)* %out @@ -58,7 +58,7 @@ ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 -; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} +; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} define void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42) @@ -67,7 +67,7 @@ } ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32: -; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} define void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind { %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42) ret void @@ -75,7 +75,7 @@ ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32_offset: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 -; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} define void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42) @@ -166,7 +166,7 @@ ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64: ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} +; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} define void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42) store i64 %result, i64 addrspace(1)* %out @@ -176,7 +176,7 @@ ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset: ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} +; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} define void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42) @@ -187,7 +187,7 @@ ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64: ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} define void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind { %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42) ret void @@ -196,7 +196,7 @@ ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64_offset: ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} -; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} +; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} define void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42) Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -2,7 +2,7 @@ ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}test1: -;CHECK: buffer_atomic_swap v0, s[0:3], 0 glc +;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_atomic_swap v0, v1, s[0:3], 0 idxen glc ;CHECK: s_waitcnt vmcnt(0) @@ -13,9 +13,9 @@ ;CHECK: buffer_atomic_swap v0, v2, s[0:3], 0 offen offset:42 glc ;CHECK-DAG: s_waitcnt vmcnt(0) ;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff -;CHECK: buffer_atomic_swap v0, s[0:3], [[SOFS]] offset:1 glc +;CHECK: buffer_atomic_swap v0, off, s[0:3], [[SOFS]] offset:1 glc ;CHECK: s_waitcnt vmcnt(0) -;CHECK: buffer_atomic_swap v0, s[0:3], 0{{$}} +;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}} define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) { main_body: %o1 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0) @@ -68,7 +68,7 @@ ; create copies which we don't bother to track here. ; ;CHECK-LABEL: {{^}}test3: -;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 glc +;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc ;CHECK: s_waitcnt vmcnt(0) ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 idxen glc ;CHECK: s_waitcnt vmcnt(0) @@ -79,7 +79,7 @@ ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v3, s[0:3], 0 offen offset:42 glc ;CHECK-DAG: s_waitcnt vmcnt(0) ;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff -;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[SOFS]] offset:1 glc +;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:1 glc define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) { main_body: %o1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0) Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll @@ -2,9 +2,9 @@ ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}buffer_load: -;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 -;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], 0 glc -;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], 0 slc +;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0 +;CHECK: buffer_load_format_xyzw v[4:7], off, s[0:3], 0 glc +;CHECK: buffer_load_format_xyzw v[8:11], off, s[0:3], 0 slc ;CHECK: s_waitcnt define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) { main_body: @@ -18,7 +18,7 @@ } ;CHECK-LABEL: {{^}}buffer_load_immoffs: -;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 offset:42 +;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0 offset:42 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) { main_body: @@ -27,11 +27,11 @@ } ;CHECK-LABEL: {{^}}buffer_load_immoffs_large: -;CHECK-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 61 offset:4095 +;CHECK-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 61 offset:4095 ;CHECK-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7fff -;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS1]] offset:4093 +;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4093 ;CHECK: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff -;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS2]] offset:1 +;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:1 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) { main_body: @@ -45,9 +45,9 @@ ;CHECK-LABEL: {{^}}buffer_load_immoffs_reuse: ;CHECK: s_movk_i32 [[OFS:s[0-9]+]], 0xfff -;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:65 +;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:65 ;CHECK-NOT: s_mov -;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:81 +;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:81 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) { main_body: @@ -105,7 +105,7 @@ } ;CHECK-LABEL: {{^}}buffer_load_x: -;CHECK: buffer_load_format_x v0, s[0:3], 0 +;CHECK: buffer_load_format_x v0, off, s[0:3], 0 ;CHECK: s_waitcnt define amdgpu_ps float @buffer_load_x(<4 x i32> inreg %rsrc) { main_body: @@ -114,7 +114,7 @@ } ;CHECK-LABEL: {{^}}buffer_load_xy: -;CHECK: buffer_load_format_xy v[0:1], s[0:3], 0 +;CHECK: buffer_load_format_xy v[0:1], off, s[0:3], 0 ;CHECK: s_waitcnt define amdgpu_ps <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) { main_body: Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll @@ -2,9 +2,9 @@ ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}buffer_load: -;CHECK: buffer_load_dwordx4 v[0:3], s[0:3], 0 -;CHECK: buffer_load_dwordx4 v[4:7], s[0:3], 0 glc -;CHECK: buffer_load_dwordx4 v[8:11], s[0:3], 0 slc +;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 +;CHECK: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc +;CHECK: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc ;CHECK: s_waitcnt define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) { main_body: @@ -18,7 +18,7 @@ } ;CHECK-LABEL: {{^}}buffer_load_immoffs: -;CHECK: buffer_load_dwordx4 v[0:3], s[0:3], 0 offset:42 +;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:42 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) { main_body: @@ -28,7 +28,7 @@ ;CHECK-LABEL: {{^}}buffer_load_immoffs_large: ;CHECK: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1fff -;CHECK: buffer_load_dwordx4 v[0:3], s[0:3], [[OFFSET]] offset:1 +;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:1 ;CHECK: s_waitcnt define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) { main_body: Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll @@ -2,9 +2,9 @@ ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}buffer_store: -;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 -;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], 0 glc -;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], 0 slc +;CHECK: buffer_store_format_xyzw v[0:3], off, s[0:3], 0 +;CHECK: buffer_store_format_xyzw v[4:7], off, s[0:3], 0 glc +;CHECK: buffer_store_format_xyzw v[8:11], off, s[0:3], 0 slc define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) { main_body: call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0) @@ -14,7 +14,7 @@ } ;CHECK-LABEL: {{^}}buffer_store_immoffs: -;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 offset:42 +;CHECK: buffer_store_format_xyzw v[0:3], off, s[0:3], 0 offset:42 define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) { main_body: call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0) Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll @@ -2,9 +2,9 @@ ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s ;CHECK-LABEL: {{^}}buffer_store: -;CHECK: buffer_store_dwordx4 v[0:3], s[0:3], 0 -;CHECK: buffer_store_dwordx4 v[4:7], s[0:3], 0 glc -;CHECK: buffer_store_dwordx4 v[8:11], s[0:3], 0 slc +;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +;CHECK: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc +;CHECK: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) { main_body: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0) @@ -14,7 +14,7 @@ } ;CHECK-LABEL: {{^}}buffer_store_immoffs: -;CHECK: buffer_store_dwordx4 v[0:3], s[0:3], 0 offset:42 +;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42 define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) { main_body: call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0) Index: test/CodeGen/AMDGPU/merge-stores.ll =================================================================== --- test/CodeGen/AMDGPU/merge-stores.ll +++ test/CodeGen/AMDGPU/merge-stores.ll @@ -231,8 +231,8 @@ } ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base: -; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; GCN: buffer_store_dwordx2 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN: buffer_store_dwordx2 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3 @@ -334,8 +334,8 @@ } ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base: -; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 -; GCN: buffer_store_dwordx4 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 +; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44 +; GCN: buffer_store_dwordx4 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28 define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12 @@ -640,13 +640,13 @@ ; GCN-LABEL: {{^}}copy_v3i32_align4: ; GCN-NOT: SCRATCH_RSRC_DWORD -; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN-NOT: offen ; GCN: s_waitcnt vmcnt ; GCN-NOT: offen -; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; GCN: ScratchSize: 0{{$}} define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { @@ -657,13 +657,13 @@ ; GCN-LABEL: {{^}}copy_v3i64_align4: ; GCN-NOT: SCRATCH_RSRC_DWORD -; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN-NOT: offen ; GCN: s_waitcnt vmcnt ; GCN-NOT: offen -; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN: ScratchSize: 0{{$}} define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4 @@ -673,13 +673,13 @@ ; GCN-LABEL: {{^}}copy_v3f32_align4: ; GCN-NOT: SCRATCH_RSRC_DWORD -; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 -; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GCN-NOT: offen ; GCN: s_waitcnt vmcnt ; GCN-NOT: offen -; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 ; GCN: ScratchSize: 0{{$}} define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4 @@ -690,13 +690,13 @@ ; GCN-LABEL: {{^}}copy_v3f64_align4: ; GCN-NOT: SCRATCH_RSRC_DWORD -; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN-NOT: offen ; GCN: s_waitcnt vmcnt ; GCN-NOT: offen -; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} +; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GCN: ScratchSize: 0{{$}} define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4 Index: test/CodeGen/AMDGPU/mubuf.ll =================================================================== --- test/CodeGen/AMDGPU/mubuf.ll +++ test/CodeGen/AMDGPU/mubuf.ll @@ -8,7 +8,7 @@ ; MUBUF load with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: {{^}}mubuf_load0: -; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0 +; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1 @@ -19,7 +19,7 @@ ; MUBUF load with the largest possible immediate offset ; CHECK-LABEL: {{^}}mubuf_load1: -; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0 +; CHECK: buffer_load_ubyte v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095 @@ -31,7 +31,7 @@ ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: {{^}}mubuf_load2: ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 -; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0 +; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024 @@ -92,7 +92,7 @@ ; MUBUF store with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: {{^}}mubuf_store0: -; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0 +; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0 define void @mubuf_store0(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1 @@ -102,7 +102,7 @@ ; MUBUF store with the largest possible immediate offset ; CHECK-LABEL: {{^}}mubuf_store1: -; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0 +; CHECK: buffer_store_byte v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0 define void @mubuf_store1(i8 addrspace(1)* %out) { entry: @@ -114,7 +114,7 @@ ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: {{^}}mubuf_store2: ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000 -; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0 +; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0 define void @mubuf_store2(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1024 @@ -135,14 +135,14 @@ } ; CHECK-LABEL: {{^}}store_sgpr_ptr: -; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 +; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 { store i32 99, i32 addrspace(1)* %out, align 4 ret void } ; CHECK-LABEL: {{^}}store_sgpr_ptr_offset: -; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40 +; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40 define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 { %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10 store i32 99, i32 addrspace(1)* %out.gep, align 4 @@ -151,7 +151,7 @@ ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset: ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 -; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] +; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 { %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768 store i32 99, i32 addrspace(1)* %out.gep, align 4 @@ -160,7 +160,7 @@ ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic: ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000 -; CHECK: buffer_atomic_add v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] +; CHECK: buffer_atomic_add v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]] define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 { %gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768 %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst Index: test/CodeGen/AMDGPU/reduce-load-width-alignment.ll =================================================================== --- test/CodeGen/AMDGPU/reduce-load-width-alignment.ll +++ test/CodeGen/AMDGPU/reduce-load-width-alignment.ll @@ -25,7 +25,7 @@ } ; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt1: -; GCN: buffer_load_dword [[VAL:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 +; GCN: buffer_load_dword [[VAL:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; GCN: buffer_store_dword [[VAL]] define void @reduce_i64_align_4_bitcast_v2i32_elt1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 { %a = load i64, i64 addrspace(1)* %in, align 4 Index: test/CodeGen/AMDGPU/schedule-global-loads.ll =================================================================== --- test/CodeGen/AMDGPU/schedule-global-loads.ll +++ test/CodeGen/AMDGPU/schedule-global-loads.ll @@ -7,8 +7,8 @@ ; ordering the loads so that the lower address loads come first. ; FUNC-LABEL: {{^}}cluster_global_arg_loads: -; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} -; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 +; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; SI: buffer_store_dword [[REG0]] ; SI: buffer_store_dword [[REG1]] define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 { Index: test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll =================================================================== --- test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll +++ test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll @@ -184,12 +184,12 @@ } ; FUNC-LABEL: @reorder_global_offsets -; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 -; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 -; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 -; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 -; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 -; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 +; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 +; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 +; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12 +; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400 +; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404 ; CI: buffer_store_dword ; CI: s_endpgm define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 { Index: test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll =================================================================== --- test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll +++ test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll @@ -18,7 +18,7 @@ ; CHECK: s_or_b64 exec, exec, s[2:3] ; CHECK-NEXT: s_mov_b32 s7, 0xf000 ; CHECK-NEXT: s_mov_b32 s6, -1 -; CHECK-NEXT: buffer_store_dword v1, s[4:7], 0 +; CHECK-NEXT: buffer_store_dword v1, off, s[4:7], 0 ; CHECK-NEXT: s_endpgm define void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind { entry: Index: test/CodeGen/AMDGPU/v_mac.ll =================================================================== --- test/CodeGen/AMDGPU/v_mac.ll +++ test/CodeGen/AMDGPU/v_mac.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; GCN-LABEL: {{^}}mac_vvv: -; GCN: buffer_load_dword [[A:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0{{$}} -; GCN: buffer_load_dword [[B:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:4 -; GCN: buffer_load_dword [[C:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:8 +; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}} +; GCN: buffer_load_dword [[B:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 +; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8 ; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]] ; GCN: buffer_store_dword [[C]] define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) { Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll =================================================================== --- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll +++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll @@ -27,7 +27,7 @@ ; VIMESA-NEXT: s_mov_b32 s15, 0x980000 -; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill +; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill ; GCN: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}} ; GCN: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}} Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll =================================================================== --- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll +++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll @@ -19,8 +19,8 @@ ; VI-NEXT: s_mov_b32 s15, 0x980000 ; s12 is offset user SGPR -; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill -; GCN: buffer_load_dword v{{[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Reload +; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill +; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Reload ; GCN: NumVgprs: 256 ; GCN: ScratchSize: 1024 Index: test/MC/AMDGPU/ds-err.s =================================================================== --- test/MC/AMDGPU/ds-err.s +++ test/MC/AMDGPU/ds-err.s @@ -2,15 +2,15 @@ // RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s // offset too big -// CHECK: invalid operand for instruction +// CHECK: error: invalid operand for instruction ds_add_u32 v2, v4 offset:1000000000 // offset0 twice -// CHECK: error: not a valid operand. +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset0:4 offset0:8 // offset1 twice -// CHECK: error: not a valid operand. +// CHECK: error: invalid operand for instruction ds_write2_b32 v2, v4, v6 offset1:4 offset1:8 // offset0 too big Index: test/MC/AMDGPU/mubuf.s =================================================================== --- test/MC/AMDGPU/mubuf.s +++ test/MC/AMDGPU/mubuf.s @@ -14,33 +14,33 @@ // load - immediate offset only //===----------------------------------------------------------------------===// -buffer_load_dword v1, s[4:7], s1 -// SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] +buffer_load_dword v1, off, s[4:7], s1 +// SICI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] -buffer_load_dword v1, s[4:7], s1 offset:4 -// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] +buffer_load_dword v1, off, s[4:7], s1 offset:4 +// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] -buffer_load_dword v1, s[4:7], s1 offset:4 glc -// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x50,0xe0,0x00,0x01,0x01,0x01] +buffer_load_dword v1, off, s[4:7], s1 offset:4 glc +// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x50,0xe0,0x00,0x01,0x01,0x01] -buffer_load_dword v1, s[4:7], s1 offset:4 slc -// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01] -// VI: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x52,0xe0,0x00,0x01,0x01,0x01] +buffer_load_dword v1, off, s[4:7], s1 offset:4 slc +// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01] +// VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x52,0xe0,0x00,0x01,0x01,0x01] -buffer_load_dword v1, s[4:7], s1 offset:4 tfe -// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01] -// VI: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x81,0x01] +buffer_load_dword v1, off, s[4:7], s1 offset:4 tfe +// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01] +// VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x81,0x01] -buffer_load_dword v1, s[4:7], s1 glc tfe -// SICI: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01] -// VI: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x50,0xe0,0x00,0x01,0x81,0x01] +buffer_load_dword v1, off, s[4:7], s1 glc tfe +// SICI: buffer_load_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01] +// VI: buffer_load_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x50,0xe0,0x00,0x01,0x81,0x01] -buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe -// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01] -// VI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01] +buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe +// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01] +// VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01] //===----------------------------------------------------------------------===// @@ -175,33 +175,33 @@ // store - immediate offset only //===----------------------------------------------------------------------===// -buffer_store_dword v1, s[4:7], s1 -// SICI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] +buffer_store_dword v1, off, s[4:7], s1 +// SICI: buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] -buffer_store_dword v1, s[4:7], s1 offset:4 -// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] +buffer_store_dword v1, off, s[4:7], s1 offset:4 +// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] -buffer_store_dword v1, s[4:7], s1 offset:4 glc -// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01] +buffer_store_dword v1, off, s[4:7], s1 offset:4 glc +// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01] -buffer_store_dword v1, s[4:7], s1 offset:4 slc -// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01] -// VI: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x72,0xe0,0x00,0x01,0x01,0x01] +buffer_store_dword v1, off, s[4:7], s1 offset:4 slc +// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01] +// VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x72,0xe0,0x00,0x01,0x01,0x01] -buffer_store_dword v1, s[4:7], s1 offset:4 tfe -// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01] -// VI: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01] +buffer_store_dword v1, off, s[4:7], s1 offset:4 tfe +// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01] +// VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01] -buffer_store_dword v1, s[4:7], s1 glc tfe -// SICI: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01] -// VI: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01] +buffer_store_dword v1, off, s[4:7], s1 glc tfe +// SICI: buffer_store_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01] +// VI: buffer_store_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01] -buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe -// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01] -// VI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01] +buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe +// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01] +// VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01] //===----------------------------------------------------------------------===// // store - vgpr offset @@ -335,85 +335,85 @@ // Instructions //===----------------------------------------------------------------------===// -buffer_load_format_x v1, s[4:7], s1 -// SICI: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01] +buffer_load_format_x v1, off, s[4:7], s1 +// SICI: buffer_load_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01] -buffer_load_format_xy v[1:2], s[4:7], s1 -// SICI: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01] +buffer_load_format_xy v[1:2], off, s[4:7], s1 +// SICI: buffer_load_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01] -buffer_load_format_xyz v[1:3], s[4:7], s1 -// SICI: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01] +buffer_load_format_xyz v[1:3], off, s[4:7], s1 +// SICI: buffer_load_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01] -buffer_load_format_xyzw v[1:4], s[4:7], s1 -// SICI: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01] +buffer_load_format_xyzw v[1:4], off, s[4:7], s1 +// SICI: buffer_load_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01] -buffer_store_format_x v1, s[4:7], s1 -// SICI: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01] +buffer_store_format_x v1, off, s[4:7], s1 +// SICI: buffer_store_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01] -buffer_store_format_xy v[1:2], s[4:7], s1 -// SICI: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01] +buffer_store_format_xy v[1:2], off, s[4:7], s1 +// SICI: buffer_store_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01] -buffer_store_format_xyz v[1:3], s[4:7], s1 -// SICI: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01] +buffer_store_format_xyz v[1:3], off, s[4:7], s1 +// SICI: buffer_store_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01] -buffer_store_format_xyzw v[1:4], s[4:7], s1 -// SICI: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01] +buffer_store_format_xyzw v[1:4], off, s[4:7], s1 +// SICI: buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01] -buffer_load_ubyte v1, s[4:7], s1 -// SICI: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01] +buffer_load_ubyte v1, off, s[4:7], s1 +// SICI: buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01] -buffer_load_sbyte v1, s[4:7], s1 -// SICI: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01] +buffer_load_sbyte v1, off, s[4:7], s1 +// SICI: buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01] -buffer_load_ushort v1, s[4:7], s1 -// SICI: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x01,0x01,0x01] +buffer_load_ushort v1, off, s[4:7], s1 +// SICI: buffer_load_ushort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_ushort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x01,0x01,0x01] -buffer_load_sshort v1, s[4:7], s1 -// SICI: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x01,0x01,0x01] +buffer_load_sshort v1, off, s[4:7], s1 +// SICI: buffer_load_sshort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_sshort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x01,0x01,0x01] -buffer_load_dword v1, s[4:7], s1 -// SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] +buffer_load_dword v1, off, s[4:7], s1 +// SICI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] -buffer_load_dwordx2 v[1:2], s[4:7], s1 -// SICI: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01] +buffer_load_dwordx2 v[1:2], off, s[4:7], s1 +// SICI: buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01] -buffer_load_dwordx4 v[1:4], s[4:7], s1 -// SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01] +buffer_load_dwordx4 v[1:4], off, s[4:7], s1 +// SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01] -buffer_store_byte v1, s[4:7], s1 -// SICI: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01] +buffer_store_byte v1, off, s[4:7], s1 +// SICI: buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01] -buffer_store_short v1, s[4:7], s1 -// SICI: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01] +buffer_store_short v1, off, s[4:7], s1 +// SICI: buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01] -buffer_store_dword v1 s[4:7], s1 -// SICI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] +buffer_store_dword v1, off, s[4:7], s1 +// SICI: buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] -buffer_store_dwordx2 v[1:2], s[4:7], s1 -// SICI: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01] +buffer_store_dwordx2 v[1:2], off, s[4:7], s1 +// SICI: buffer_store_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01] -buffer_store_dwordx4 v[1:4], s[4:7], s1 -// SICI: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01] +buffer_store_dwordx4 v[1:4], off, s[4:7], s1 +// SICI: buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01] //===----------------------------------------------------------------------===// // Cache invalidation Index: test/MC/AMDGPU/reg-syntax-extra.s =================================================================== --- test/MC/AMDGPU/reg-syntax-extra.s +++ test/MC/AMDGPU/reg-syntax-extra.s @@ -50,6 +50,6 @@ // SICI: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e] // VI: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x4b,0x02,0x7e] -buffer_load_dwordx4 [v1,v2,v3,v4], [s4,s5,s6,s7], s1 -// SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] -// VI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01] +buffer_load_dwordx4 [v1,v2,v3,v4], off, [s4,s5,s6,s7], s1 +// SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01] +// VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01] Index: test/MC/Disassembler/AMDGPU/mubuf_vi.txt =================================================================== --- test/MC/Disassembler/AMDGPU/mubuf_vi.txt +++ test/MC/Disassembler/AMDGPU/mubuf_vi.txt @@ -1,24 +1,24 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI -# VI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x50 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] 0x04 0x00 0x50 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x50,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x50,0xe0,0x00,0x01,0x01,0x01] 0x04 0x40 0x50 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x52,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x52,0xe0,0x00,0x01,0x01,0x01] 0x04 0x00 0x52 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x81,0x01] +# VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x81,0x01] 0x04 0x00 0x50 0xe0 0x00 0x01 0x81 0x01 -# VI: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x50,0xe0,0x00,0x01,0x81,0x01] +# VI: buffer_load_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x50,0xe0,0x00,0x01,0x81,0x01] 0x00 0x40 0x50 0xe0 0x00 0x01 0x81 0x01 -# VI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01] +# VI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01] 0x04 0x40 0x52 0xe0 0x00 0x01 0x81 0x01 # VI: buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x50,0xe0,0x02,0x01,0x01,0x01] @@ -84,25 +84,25 @@ # VI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x81,0x01] 0x04 0x70 0x52 0xe0 0x02 0x01 0x81 0x01 -# VI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x70 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] 0x04 0x00 0x70 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01] 0x04 0x40 0x70 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x72,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x72,0xe0,0x00,0x01,0x01,0x01] 0x04 0x00 0x72 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01] +# VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01] 0x04 0x00 0x70 0xe0 0x00 0x01 0x81 0x01 -# VI: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01] +# VI: buffer_store_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01] 0x00 0x40 0x70 0xe0 0x00 0x01 0x81 0x01 -# VI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01] +# VI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01] 0x04 0x40 0x72 0xe0 0x00 0x01 0x81 0x01 # VI: buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01] @@ -168,64 +168,64 @@ # VI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x81,0x01] 0x04 0x70 0x72 0xe0 0x02 0x01 0x81 0x01 -# VI: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x00 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x04 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x08 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x0c 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x10 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x14 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x18 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x1c 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x40 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x44 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_ushort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x48 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_sshort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x4c 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x50 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x54 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x5c 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x60 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x68 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x70 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x74 0xe0 0x00 0x01 0x01 0x01 -# VI: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01] +# VI: buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01] 0x00 0x00 0x7c 0xe0 0x00 0x01 0x01 0x01 # VI: buffer_wbinvl1 ; encoding: [0x00,0x00,0xf8,0xe0,0x00,0x00,0x00,0x00]