Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
===================================================================
--- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -61,15 +61,18 @@
 
   enum ImmTy {
     ImmTyNone,
-    ImmTyDSOffset0,
-    ImmTyDSOffset1,
     ImmTyGDS,
+    ImmTyOffen,
+    ImmTyIdxen,
+    ImmTyAddr64,
     ImmTyOffset,
+    ImmTyOffset0,
+    ImmTyOffset1,
     ImmTyGLC,
     ImmTySLC,
     ImmTyTFE,
-    ImmTyClamp,
-    ImmTyOMod,
+    ImmTyClampSI,
+    ImmTyOModSI,
     ImmTyDppCtrl,
     ImmTyDppRowMask,
     ImmTyDppBankMask,
@@ -149,13 +152,6 @@
     }
   }
 
-  bool defaultTokenHasSuffix() const {
-    StringRef Token(Tok.Data, Tok.Length);
-
-    return Token.endswith("_e32") || Token.endswith("_e64") ||
-      Token.endswith("_dpp");
-  }
-
   bool isToken() const override {
     return Kind == Token;
   }
@@ -178,16 +174,6 @@
            F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0);
   }
 
-  bool isDSOffset0() const {
-    assert(isImm());
-    return Imm.Type == ImmTyDSOffset0;
-  }
-
-  bool isDSOffset1() const {
-    assert(isImm());
-    return Imm.Type == ImmTyDSOffset1;
-  }
-
   int64_t getImm() const {
     return Imm.Val;
   }
@@ -213,12 +199,12 @@
     return isImm() && Imm.Type == ImmT;
   }
 
-  bool isClamp() const {
-    return isImmTy(ImmTyClamp);
+  bool isClampSI() const {
+    return isImmTy(ImmTyClampSI);
   }
 
-  bool isOMod() const {
-    return isImmTy(ImmTyOMod);
+  bool isOModSI() const {
+    return isImmTy(ImmTyOModSI);
   }
 
   bool isImmModifier() const {
@@ -235,9 +221,15 @@
   bool isLWE() const { return isImmTy(ImmTyLWE); }
 
   bool isMod() const {
-    return isClamp() || isOMod();
+    return isClampSI() || isOModSI();
   }
 
+  bool isOffen() const { return isImmTy(ImmTyOffen); }
+  bool isIdxen() const { return isImmTy(ImmTyIdxen); }
+  bool isAddr64() const { return isImmTy(ImmTyAddr64); }
+  bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
+  bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<16>(getImm()); }
+  bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
   bool isGDS() const { return isImmTy(ImmTyGDS); }
   bool isGLC() const { return isImmTy(ImmTyGLC); }
   bool isSLC() const { return isImmTy(ImmTySLC); }
@@ -347,16 +339,47 @@
     return EndLoc;
   }
 
+  void printImmTy(raw_ostream& OS, ImmTy Type) const {
+    switch (Type) {
+    case ImmTyNone: OS << "None"; break;
+    case ImmTyGDS: OS << "GDS"; break;
+    case ImmTyOffen: OS << "Offen"; break;
+    case ImmTyIdxen: OS << "Idxen"; break;
+    case ImmTyAddr64: OS << "Addr64"; break;
+    case ImmTyOffset: OS << "Offset"; break;
+    case ImmTyOffset0: OS << "Offset0"; break;
+    case ImmTyOffset1: OS << "Offset1"; break;
+    case ImmTyGLC: OS << "GLC"; break;
+    case ImmTySLC: OS << "SLC"; break;
+    case ImmTyTFE: OS << "TFE"; break;
+    case ImmTyClampSI: OS << "ClampSI"; break;
+    case ImmTyOModSI: OS << "OModSI"; break;
+    case ImmTyDppCtrl: OS << "DppCtrl"; break;
+    case ImmTyDppRowMask: OS << "DppRowMask"; break;
+    case ImmTyDppBankMask: OS << "DppBankMask"; break;
+    case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
+    case ImmTySdwaSel: OS << "SdwaSel"; break;
+    case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
+    case ImmTyDMask: OS << "DMask"; break;
+    case ImmTyUNorm: OS << "UNorm"; break;
+    case ImmTyDA: OS << "DA"; break;
+    case ImmTyR128: OS << "R128"; break;
+    case ImmTyLWE: OS << "LWE"; break;
+    case ImmTyHwreg: OS << "Hwreg"; break;
+    }
+  }
+
   void print(raw_ostream &OS) const override {
     switch (Kind) {
     case Register:
       OS << "<register " << getReg() << " mods: " << Reg.Modifiers << '>';
       break;
     case Immediate:
-      if (Imm.Type != AMDGPUOperand::ImmTyNone)
-        OS << getImm();
-      else
-        OS << '<' << getImm() << " mods: " << Imm.Modifiers << '>';
+      OS << '<' << getImm();
+      if (getImmTy() != ImmTyNone) {
+        OS << " type: "; printImmTy(OS, getImmTy());
+      }
+      OS << " mods: " << Imm.Modifiers << '>';
       break;
     case Token:
       OS << '\'' << getToken() << '\'';
@@ -414,8 +437,6 @@
     return Op;
   }
 
-  bool isDSOffset() const;
-  bool isDSOffset01() const;
   bool isSWaitCnt() const;
   bool isHwreg() const;
   bool isMubufOffset() const;
@@ -521,43 +542,55 @@
                         SMLoc NameLoc, OperandVector &Operands) override;
 
   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int,
-                                          int64_t Default = 0);
+                                          int64_t Default = 0, bool AddDefault = false);
   OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
                                           OperandVector &Operands,
-                                          enum AMDGPUOperand::ImmTy ImmTy =
-                                                      AMDGPUOperand::ImmTyNone);
+                                          enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+                                          int64_t Default = 0, bool AddDefault = false,
+                                          bool (*ConvertResult)(int64_t&) = 0);
   OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
                                      enum AMDGPUOperand::ImmTy ImmTy =
-                                                      AMDGPUOperand::ImmTyNone);
+                                                      AMDGPUOperand::ImmTyNone,
+                                     bool AddDefault = false);
   OperandMatchResultTy parseOptionalOps(
                                    const ArrayRef<OptionalOperand> &OptionalOps,
                                    OperandVector &Operands);
   OperandMatchResultTy parseStringWithPrefix(const char *Prefix, StringRef &Value);
 
+  OperandMatchResultTy parseOptionalOperand(OperandVector &Operands, const OptionalOperand& Op, bool AddDefault);
+  OperandMatchResultTy parseAMDGPUOperand(OperandVector &Operands, StringRef Name);
 
   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
   void cvtDS(MCInst &Inst, const OperandVector &Operands);
-  OperandMatchResultTy parseDSOptionalOps(OperandVector &Operands);
-  OperandMatchResultTy parseDSOff01OptionalOps(OperandVector &Operands);
-  OperandMatchResultTy parseDSOffsetOptional(OperandVector &Operands);
 
   bool parseCnt(int64_t &IntVal);
   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
-  bool parseHwreg(int64_t &HwRegCode, int64_t &Offset, int64_t &Width);
-  OperandMatchResultTy parseHwregOp(OperandVector &Operands);
+  bool parseHwregOperand(int64_t &HwRegCode, int64_t &Offset, int64_t &Width);
+  OperandMatchResultTy parseHwreg(OperandVector &Operands);
   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
 
-  OperandMatchResultTy parseFlatOptionalOps(OperandVector &Operands);
-  OperandMatchResultTy parseFlatAtomicOptionalOps(OperandVector &Operands);
   void cvtFlat(MCInst &Inst, const OperandVector &Operands);
   void cvtFlatAtomic(MCInst &Inst, const OperandVector &Operands);
 
   void cvtMubuf(MCInst &Inst, const OperandVector &Operands);
-  OperandMatchResultTy parseOffset(OperandVector &Operands);
-  OperandMatchResultTy parseMubufOptionalOps(OperandVector &Operands);
-  OperandMatchResultTy parseGLC(OperandVector &Operands);
-  OperandMatchResultTy parseSLC(OperandVector &Operands);
-  OperandMatchResultTy parseTFE(OperandVector &Operands);
+  OperandMatchResultTy parseOModSI(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "omod"); }
+  OperandMatchResultTy parseClampSI(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "clamp"); }
+  OperandMatchResultTy parseSMRDOffset(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "smrd_offset"); }
+  OperandMatchResultTy parseSMRDLiteralOffset(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "smrd_literal_offset"); }
+  OperandMatchResultTy parseDPPCtrl(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "dpp_ctrl"); }
+  OperandMatchResultTy parseRowMask(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "row_mask"); }
+  OperandMatchResultTy parseBankMask(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "bank_mask"); }
+  OperandMatchResultTy parseBoundCtrl(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "bound_ctrl"); }
+  OperandMatchResultTy parseOffen(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "offen"); }
+  OperandMatchResultTy parseIdxen(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "idxen"); }
+  OperandMatchResultTy parseAddr64(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "addr64"); }
+  OperandMatchResultTy parseOffset(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "offset"); }
+  OperandMatchResultTy parseOffset0(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "offset0"); }
+  OperandMatchResultTy parseOffset1(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "offset1"); }
+  OperandMatchResultTy parseGLC(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "glc"); }
+  OperandMatchResultTy parseSLC(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "slc"); }
+  OperandMatchResultTy parseTFE(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "tfe"); }
+  OperandMatchResultTy parseGDS(OperandVector &Operands) { return parseAMDGPUOperand(Operands, "gds"); }
 
   OperandMatchResultTy parseDMask(OperandVector &Operands);
   OperandMatchResultTy parseUNorm(OperandVector &Operands);
@@ -565,6 +598,8 @@
   OperandMatchResultTy parseR128(OperandVector &Operands);
   OperandMatchResultTy parseLWE(OperandVector &Operands);
 
+  OperandMatchResultTy parseOModOperand(OperandVector &Operands);
+
   void cvtId(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
   void cvtVOP3_2_nomod(MCInst &Inst, const OperandVector &Operands);
@@ -573,10 +608,8 @@
 
   void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
-  OperandMatchResultTy parseVOP3OptionalOps(OperandVector &Operands);
 
-  OperandMatchResultTy parseDPPCtrlOps(OperandVector &Operands);
-  OperandMatchResultTy parseDPPOptionalOps(OperandVector &Operands);
+  OperandMatchResultTy parseDPPCtrlOps(OperandVector &Operands, bool AddDefault);
   void cvtDPP_mod(MCInst &Inst, const OperandVector &Operands);
   void cvtDPP_nomod(MCInst &Inst, const OperandVector &Operands);
   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool HasMods);
@@ -1109,8 +1142,8 @@
       return true;
     if (Op.isImm() && Op.hasModifiers())
       return true;
-    if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOMod ||
-                       Op.getImmTy() == AMDGPUOperand::ImmTyClamp))
+    if (Op.isImm() && (Op.getImmTy() == AMDGPUOperand::ImmTyOModSI ||
+                       Op.getImmTy() == AMDGPUOperand::ImmTyClampSI))
       return true;
   }
   return false;
@@ -1213,12 +1246,19 @@
         }
         Operands.push_back(std::move(R));
       } else {
-        ResTy = parseVOP3OptionalOps(Operands);
         if (ResTy == MatchOperand_NoMatch) {
           const auto &Tok = Parser.getTok();
           Operands.push_back(AMDGPUOperand::CreateToken(Tok.getString(),
                                                         Tok.getLoc()));
           Parser.Lex();
+          if (getLexer().is(AsmToken::Colon)) {
+            Parser.Lex();
+            if (getLexer().is(AsmToken::Identifier)) {
+              Parser.Lex();
+            }
+          }
+        } else {
+          return ResTy;
         }
       }
       return MatchOperand_Success;
@@ -1243,6 +1283,10 @@
   // Add the instruction mnemonic
   Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
 
+
+  if (Name.endswith("_e64")) { Name = Name.substr(0, Name.size() - 4); }
+  if (Name.endswith("_e32")) { Name = Name.substr(0, Name.size() - 4); }
+
   while (!getLexer().is(AsmToken::EndOfStatement)) {
     AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
 
@@ -1268,7 +1312,7 @@
 
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int,
-                                    int64_t Default) {
+                                    int64_t Default, bool AddDefault) {
   // We are at the end of the statement, and this is a default argument, so
   // use a default value.
   if (getLexer().is(AsmToken::EndOfStatement)) {
@@ -1279,9 +1323,14 @@
   switch(getLexer().getKind()) {
     default: return MatchOperand_NoMatch;
     case AsmToken::Identifier: {
-      StringRef OffsetName = Parser.getTok().getString();
-      if (!OffsetName.equals(Prefix))
+      StringRef Name = Parser.getTok().getString();
+      if (!Name.equals(Prefix)) {
+        if (AddDefault) {
+          Int = Default;
+          return MatchOperand_Success;
+        }
         return MatchOperand_NoMatch;
+      }
 
       Parser.Lex();
       if (getLexer().isNot(AsmToken::Colon))
@@ -1301,22 +1350,29 @@
 
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
-                                    enum AMDGPUOperand::ImmTy ImmTy) {
+                                    enum AMDGPUOperand::ImmTy ImmTy,
+                                    int64_t Default, bool AddDefault,
+                                    bool (*ConvertResult)(int64_t&)) {
 
   SMLoc S = Parser.getTok().getLoc();
-  int64_t Offset = 0;
+  int64_t Value = 0;
 
-  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Offset);
+  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value, Default, AddDefault);
   if (Res != MatchOperand_Success)
     return Res;
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Offset, S, ImmTy));
+  if (ConvertResult && !ConvertResult(Value)) {
+    return MatchOperand_ParseFail;
+  }
+
+  Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy));
   return MatchOperand_Success;
 }
 
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
-                               enum AMDGPUOperand::ImmTy ImmTy) {
+                               enum AMDGPUOperand::ImmTy ImmTy,
+                                     bool AddDefault) {
   int64_t Bit = 0;
   SMLoc S = Parser.getTok().getLoc();
 
@@ -1333,7 +1389,11 @@
           Bit = 0;
           Parser.Lex();
         } else {
-          return MatchOperand_NoMatch;
+          if (AddDefault) {
+            Bit = 0;
+          } else {
+            return MatchOperand_NoMatch;
+          }
         }
         break;
       }
@@ -1438,47 +1498,6 @@
 // ds
 //===----------------------------------------------------------------------===//
 
-static const OptionalOperand DSOptionalOps [] = {
-  {"offset",  AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
-  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
-};
-
-static const OptionalOperand DSOptionalOpsOff01 [] = {
-  {"offset0", AMDGPUOperand::ImmTyDSOffset0, false, 0, nullptr},
-  {"offset1", AMDGPUOperand::ImmTyDSOffset1, false, 0, nullptr},
-  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr}
-};
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDSOptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(DSOptionalOps, Operands);
-}
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDSOff01OptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(DSOptionalOpsOff01, Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDSOffsetOptional(OperandVector &Operands) {
-  SMLoc S = Parser.getTok().getLoc();
-  AMDGPUAsmParser::OperandMatchResultTy Res =
-    parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
-  if (Res == MatchOperand_NoMatch) {
-    Operands.push_back(AMDGPUOperand::CreateImm(0, S,
-                       AMDGPUOperand::ImmTyOffset));
-    Res = MatchOperand_Success;
-  }
-  return Res;
-}
-
-bool AMDGPUOperand::isDSOffset() const {
-  return isImm() && isUInt<16>(getImm());
-}
-
-bool AMDGPUOperand::isDSOffset01() const {
-  return isImm() && isUInt<8>(getImm());
-}
-
 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
                                     const OperandVector &Operands) {
 
@@ -1497,8 +1516,8 @@
     OptionalIdx[Op.getImmTy()] = i;
   }
 
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDSOffset0);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDSOffset1);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
 
   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
@@ -1612,7 +1631,7 @@
   return MatchOperand_Success;
 }
 
-bool AMDGPUAsmParser::parseHwreg(int64_t &HwRegCode, int64_t &Offset, int64_t &Width) {
+bool AMDGPUAsmParser::parseHwregOperand(int64_t &HwRegCode, int64_t &Offset, int64_t &Width) {
   if (Parser.getTok().getString() != "hwreg")
     return true;
   Parser.Lex();
@@ -1658,7 +1677,7 @@
 }
 
 AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseHwregOp(OperandVector &Operands) {
+AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
   int64_t Imm16Val = 0;
   SMLoc S = Parser.getTok().getLoc();
 
@@ -1679,7 +1698,7 @@
         int64_t HwRegCode = 0;
         int64_t Offset = 0; // default
         int64_t Width = 32; // default
-        if (parseHwreg(HwRegCode, Offset, Width))
+        if (parseHwregOperand(HwRegCode, Offset, Width))
           return MatchOperand_ParseFail;
         // HwRegCode (6) [5:0]
         // Offset (5) [10:6]
@@ -1737,27 +1756,6 @@
 // flat
 //===----------------------------------------------------------------------===//
 
-static const OptionalOperand FlatOptionalOps [] = {
-  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
-  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
-  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
-};
-
-static const OptionalOperand FlatAtomicOptionalOps [] = {
-  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
-  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
-};
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseFlatOptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(FlatOptionalOps, Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseFlatAtomicOptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(FlatAtomicOptionalOps, Operands);
-}
-
 void AMDGPUAsmParser::cvtFlat(MCInst &Inst,
                                const OperandVector &Operands) {
   OptionalImmIndexMap OptionalIdx;
@@ -1808,38 +1806,6 @@
 // mubuf
 //===----------------------------------------------------------------------===//
 
-static const OptionalOperand MubufOptionalOps [] = {
-  {"offset", AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
-  {"glc",    AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
-  {"slc",    AMDGPUOperand::ImmTySLC, true, 0, nullptr},
-  {"tfe",    AMDGPUOperand::ImmTyTFE, true, 0, nullptr}
-};
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseMubufOptionalOps(OperandVector &Operands) {
-  return parseOptionalOps(MubufOptionalOps, Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseOffset(OperandVector &Operands) {
-  return parseIntWithPrefix("offset", Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseGLC(OperandVector &Operands) {
-  return parseNamedBit("glc", Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseSLC(OperandVector &Operands) {
-  return parseNamedBit("slc", Operands);
-}
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseTFE(OperandVector &Operands) {
-  return parseNamedBit("tfe", Operands);
-}
-
 bool AMDGPUOperand::isMubufOffset() const {
   return isImmTy(ImmTyOffset) && isUInt<12>(getImm());
 }
@@ -1952,64 +1918,101 @@
   return false;
 }
 
-static const OptionalOperand VOP3OptionalOps [] = {
-  {"clamp", AMDGPUOperand::ImmTyClamp, true, 0, nullptr},
-  {"mul",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodMul},
-  {"div",   AMDGPUOperand::ImmTyOMod, false, 1, ConvertOmodDiv},
-};
-
-static bool isVOP3(OperandVector &Operands) {
-  if (operandsHaveModifiers(Operands))
+static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
+  if (BoundCtrl == 0) {
+    BoundCtrl = 1;
+    return true;
+  } else if (BoundCtrl == -1) {
+    BoundCtrl = 0;
     return true;
-
-  if (Operands.size() >= 2) {
-    AMDGPUOperand &DstOp = ((AMDGPUOperand&)*Operands[1]);
-
-    if (DstOp.isRegClass(AMDGPU::SGPR_64RegClassID))
-      return true;
   }
+  return false;
+}
 
-  if (Operands.size() >= 5)
-    return true;
+// Note: the order in this table matches the order of operands in AsmString.
+static const OptionalOperand AMDGPUOperandTable[] = {
+  {"offen",   AMDGPUOperand::ImmTyOffen, true, 0, nullptr},
+  {"offset0", AMDGPUOperand::ImmTyOffset0, false, 0, nullptr},
+  {"offset1", AMDGPUOperand::ImmTyOffset1, false, 0, nullptr},
+  {"gds",     AMDGPUOperand::ImmTyGDS, true, 0, nullptr},
+  {"offset",  AMDGPUOperand::ImmTyOffset, false, 0, nullptr},
+  {"glc",     AMDGPUOperand::ImmTyGLC, true, 0, nullptr},
+  {"slc",     AMDGPUOperand::ImmTySLC, true, 0, nullptr},
+  {"tfe",     AMDGPUOperand::ImmTyTFE, true, 0, nullptr},
+  {"clamp",   AMDGPUOperand::ImmTyClampSI, true, 0, nullptr},
+  {"omod",    AMDGPUOperand::ImmTyOModSI, false, 1, ConvertOmodMul},
+  {"unorm",   AMDGPUOperand::ImmTyUNorm, true, 0, nullptr},
+  {"da",      AMDGPUOperand::ImmTyDA,    true, 0, nullptr},
+  {"r128",    AMDGPUOperand::ImmTyR128,  true, 0, nullptr},
+  {"lwe",     AMDGPUOperand::ImmTyLWE,   true, 0, nullptr},
+  {"dmask",   AMDGPUOperand::ImmTyDMask, false, 0, nullptr},
+  {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, -1, nullptr},
+  {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, 0xf, nullptr},
+  {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, 0xf, nullptr},
+  {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, -1, ConvertBoundCtrl},
+};
 
-  if (Operands.size() > 3) {
-    AMDGPUOperand &Src1Op = ((AMDGPUOperand&)*Operands[3]);
-    if (Src1Op.isRegClass(AMDGPU::SReg_32RegClassID) ||
-        Src1Op.isRegClass(AMDGPU::SReg_64RegClassID))
-      return true;
+AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands, const OptionalOperand& Op, bool AddDefault)
+{
+  if (Op.IsBit) {
+    return parseNamedBit(Op.Name, Operands, Op.Type, AddDefault);
+  } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
+    return parseDPPCtrlOps(Operands, AddDefault);
+  } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
+    return parseOModOperand(Operands);
+  } else {
+    return parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.Default, AddDefault, Op.ConvertResult);
   }
-  return false;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
-
-  // The value returned by this function may change after parsing
-  // an operand so store the original value here.
-  bool HasModifiers = operandsHaveModifiers(Operands);
-
-  bool IsVOP3 = isVOP3(Operands);
-  if (HasModifiers || IsVOP3 ||
-      getLexer().isNot(AsmToken::EndOfStatement) ||
-      getForcedEncodingSize() == 64) {
-
-    AMDGPUAsmParser::OperandMatchResultTy Res =
-        parseOptionalOps(VOP3OptionalOps, Operands);
-
-    if (!HasModifiers && Res == MatchOperand_Success) {
-      // We have added a modifier operation, so we need to make sure all
-      // previous register operands have modifiers
-      for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
-        AMDGPUOperand &Op = ((AMDGPUOperand&)*Operands[i]);
-        if ((Op.isReg() || Op.isImm()) && !Op.hasModifiers())
-          Op.setModifiers(0);
-      }
+AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseAMDGPUOperand(OperandVector &Operands, StringRef Name)
+{
+  StringRef Tok;
+  if (getLexer().isNot(AsmToken::EndOfStatement) && getLexer().getKind() == AsmToken::Identifier) {
+    Tok = Parser.getTok().getString();
+  }
+  bool optional = false;
+  if (Tok == "mul" || Tok == "div") { optional = true; }
+  for (const OptionalOperand &Op1 : AMDGPUOperandTable) {
+    if (Op1.Name == Tok) { optional = true; break; }
+  }
+  // Attemp to parse current optional operand.
+  for (const OptionalOperand &Op : AMDGPUOperandTable) {
+    bool parseThis =
+      Name == "" ||
+      (Op.Name == Name) ||
+      (Name == "omod" && Op.Type == AMDGPUOperand::ImmTyOModSI);
+    if (parseThis && Tok == Name) {
+      // Exactly the expected token for optional operand.
+      // Parse it and add operand normally.
+      return parseOptionalOperand(Operands, Op, true);
+    } else if (parseThis) {
+      // Token for optional operand which is later in the table
+      // than the one we expect. If needed, add default value
+      // for the operand we expect, do not consume anything
+      // and return MatchOperand_NoMatch. Parsing will continue.
+      return parseOptionalOperand(Operands, Op, optional);
+    } else if (Op.Name == Tok) {
+      // This looks like optional operand, but we do not expect it.
+      // This is the case when AsmString has token in it.
+      return MatchOperand_NoMatch;
     }
-    return Res;
   }
   return MatchOperand_NoMatch;
 }
 
+AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands)
+{
+  StringRef Name = Parser.getTok().getString();
+  if (Name == "mul") {
+    return parseIntWithPrefix("mul", Operands, AMDGPUOperand::ImmTyOModSI, 0, false, ConvertOmodMul);
+  } else if (Name == "div") {
+    return parseIntWithPrefix("div", Operands, AMDGPUOperand::ImmTyOModSI, 0, false, ConvertOmodDiv);
+  } else {
+    return MatchOperand_NoMatch;
+  }
+}
+
 void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) {
   unsigned I = 1;
   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
@@ -2060,8 +2063,8 @@
     }
   }
 
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOMod);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
 }
 
 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) {
@@ -2090,11 +2093,11 @@
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
-  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
 }
 
 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
@@ -2158,7 +2161,7 @@
 }
 
 AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDPPCtrlOps(OperandVector &Operands) {
+AMDGPUAsmParser::parseDPPCtrlOps(OperandVector &Operands, bool AddDefault) {
   SMLoc S = Parser.getTok().getLoc();
   StringRef Prefix;
   int64_t Int;
@@ -2184,7 +2187,12 @@
         && Prefix != "wave_shr"
         && Prefix != "wave_ror"
         && Prefix != "row_bcast") {
-      return MatchOperand_NoMatch;
+      if (AddDefault) {
+        Operands.push_back(AMDGPUOperand::CreateImm(0, S, AMDGPUOperand::ImmTyDppCtrl));
+        return MatchOperand_Success;
+      } else {
+        return MatchOperand_NoMatch;
+      }
     }
 
     Parser.Lex();
@@ -2269,30 +2277,6 @@
   return MatchOperand_Success;
 }
 
-static const OptionalOperand DPPOptionalOps [] = {
-  {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, 0xf, nullptr},
-  {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, 0xf, nullptr},
-  {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, -1, nullptr}
-};
-
-AMDGPUAsmParser::OperandMatchResultTy
-AMDGPUAsmParser::parseDPPOptionalOps(OperandVector &Operands) {
-  SMLoc S = Parser.getTok().getLoc();
-  OperandMatchResultTy Res = parseOptionalOps(DPPOptionalOps, Operands);
-  // XXX - sp3 use syntax "bound_ctrl:0" to indicate that bound_ctrl bit was set
-  if (Res == MatchOperand_Success) {
-    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
-    // If last operand was parsed as bound_ctrl we should replace it with correct value (1)
-    if (Op.isImmTy(AMDGPUOperand::ImmTyDppBoundCtrl)) {
-      Operands.pop_back();
-      Operands.push_back(
-        AMDGPUOperand::CreateImm(1, S, AMDGPUOperand::ImmTyDppBoundCtrl));
-        return MatchOperand_Success;
-    }
-  }
-  return Res;
-}
-
 void AMDGPUAsmParser::cvtDPP_mod(MCInst &Inst, const OperandVector &Operands) {
   cvtDPP(Inst, Operands, true);
 }
Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
===================================================================
--- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -45,9 +45,11 @@
   void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printDSOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printDSOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printDSOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSMRDOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -63,10 +65,10 @@
   void printImmediate64(uint64_t I, raw_ostream &O);
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printDPPCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printRowMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printBankMaskOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printBoundCtrlOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printDPPCtrl(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printRowMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBankMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBoundCtrl(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printSDWADstSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
===================================================================
--- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -94,7 +94,7 @@
   }
 }
 
-void AMDGPUInstPrinter::printDSOffset(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
                                       raw_ostream &O) {
   uint16_t Imm = MI->getOperand(OpNo).getImm();
   if (Imm != 0) {
@@ -103,7 +103,7 @@
   }
 }
 
-void AMDGPUInstPrinter::printDSOffset0(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
   if (MI->getOperand(OpNo).getImm()) {
     O << " offset0:";
@@ -111,7 +111,7 @@
   }
 }
 
-void AMDGPUInstPrinter::printDSOffset1(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
   if (MI->getOperand(OpNo).getImm()) {
     O << " offset1:";
@@ -119,6 +119,16 @@
   }
 }
 
+void AMDGPUInstPrinter::printSMRDOffset(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  printU32ImmOperand(MI, OpNo, O);
+}
+
+void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
+                                               raw_ostream &O) {
+  printU32ImmOperand(MI, OpNo, O);
+}
+
 void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
                                  raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "gds");
@@ -422,7 +432,7 @@
 }
 
 
-void AMDGPUInstPrinter::printDPPCtrlOperand(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
                                              raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNo).getImm();
   if (Imm <= 0x0ff) {
@@ -461,19 +471,19 @@
   }
 }
 
-void AMDGPUInstPrinter::printRowMaskOperand(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printRowMask(const MCInst *MI, unsigned OpNo,
                                             raw_ostream &O) {
   O << " row_mask:";
   printU4ImmOperand(MI, OpNo, O);
 }
 
-void AMDGPUInstPrinter::printBankMaskOperand(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printBankMask(const MCInst *MI, unsigned OpNo,
                                              raw_ostream &O) {
   O << " bank_mask:";
   printU4ImmOperand(MI, OpNo, O);
 }
 
-void AMDGPUInstPrinter::printBoundCtrlOperand(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
                                               raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNo).getImm();
   if (Imm) {
Index: lib/Target/AMDGPU/SIInstrInfo.td
===================================================================
--- lib/Target/AMDGPU/SIInstrInfo.td
+++ lib/Target/AMDGPU/SIInstrInfo.td
@@ -461,322 +461,75 @@
 include "SIInstrFormats.td"
 include "VIInstrFormats.td"
 
-def MubufOffsetMatchClass : AsmOperandClass {
-  let Name = "MubufOffset";
-  let ParserMethod = "parseMubufOptionalOps";
+class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
+  let Name = "Imm"#CName;
+  let PredicateMethod = "is"#CName;
+  let ParserMethod = "parse"#CName;
   let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
+  let IsOptional = Optional;
 }
 
-class DSOffsetBaseMatchClass <string parser> : AsmOperandClass {
-  let Name = "DSOffset"#parser;
-  let ParserMethod = parser;
-  let RenderMethod = "addImmOperands";
-  let PredicateMethod = "isDSOffset";
-  let IsOptional = 1;
-}
-
-def DSOffsetMatchClass : DSOffsetBaseMatchClass <"parseDSOptionalOps">;
-def DSOffsetGDSMatchClass : DSOffsetBaseMatchClass <"parseDSOffsetOptional">;
-
-def DSOffset01MatchClass : AsmOperandClass {
-  let Name = "DSOffset1";
-  let ParserMethod = "parseDSOff01OptionalOps";
-  let RenderMethod = "addImmOperands";
-  let PredicateMethod = "isDSOffset01";
-  let IsOptional = 1;
-}
-
-class GDSBaseMatchClass <string parser> : AsmOperandClass {
-  let Name = "GDS"#parser;
-  let PredicateMethod = "isGDS";
-  let ParserMethod = parser;
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-def GDSMatchClass : GDSBaseMatchClass <"parseDSOptionalOps">;
-def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
-
-class GLCBaseMatchClass <string parser> : AsmOperandClass {
-  let Name = "GLC"#parser;
-  let PredicateMethod = "isGLC";
-  let ParserMethod = parser;
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-def GLCMubufMatchClass : GLCBaseMatchClass <"parseMubufOptionalOps">;
-def GLCFlatMatchClass : GLCBaseMatchClass <"parseFlatOptionalOps">;
-
-class SLCBaseMatchClass <string parser> : AsmOperandClass {
-  let Name = "SLC"#parser;
-  let PredicateMethod = "isSLC";
-  let ParserMethod = parser;
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-def SLCMubufMatchClass : SLCBaseMatchClass <"parseMubufOptionalOps">;
-def SLCFlatMatchClass : SLCBaseMatchClass <"parseFlatOptionalOps">;
-def SLCFlatAtomicMatchClass : SLCBaseMatchClass <"parseFlatAtomicOptionalOps">;
-
-class TFEBaseMatchClass <string parser> : AsmOperandClass {
-  let Name = "TFE"#parser;
-  let PredicateMethod = "isTFE";
-  let ParserMethod = parser;
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-def TFEMubufMatchClass : TFEBaseMatchClass <"parseMubufOptionalOps">;
-def TFEFlatMatchClass : TFEBaseMatchClass <"parseFlatOptionalOps">;
-def TFEFlatAtomicMatchClass : TFEBaseMatchClass <"parseFlatAtomicOptionalOps">;
-
-def OModMatchClass : AsmOperandClass {
-  let Name = "OMod";
-  let PredicateMethod = "isOMod";
-  let ParserMethod = "parseVOP3OptionalOps";
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
+def sdwa_sel : NamedMatchClass<"SDWASel">;
 
-def ClampMatchClass : AsmOperandClass {
-  let Name = "Clamp";
-  let PredicateMethod = "isClamp";
-  let ParserMethod = "parseVOP3OptionalOps";
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
+class NamedOperandBit<string Name, AsmOperandClass MatchClass> : Operand<i1> {
+  let PrintMethod = "print"#Name;
+  let ParserMatchClass = MatchClass;
 }
-
-class SMRDOffsetBaseMatchClass <string predicate> : AsmOperandClass {
-  let Name = "SMRDOffset"#predicate;
-  let PredicateMethod = predicate;
-  let RenderMethod = "addImmOperands";
+ 
+class NamedOperandU8<string Name, AsmOperandClass MatchClass> : Operand<i8> {
+  let PrintMethod = "print"#Name;
+  let ParserMatchClass = MatchClass;
 }
 
-def SMRDOffsetMatchClass : SMRDOffsetBaseMatchClass <"isSMRDOffset">;
-def SMRDLiteralOffsetMatchClass : SMRDOffsetBaseMatchClass <
-  "isSMRDLiteralOffset"
->;
-
-def DPPCtrlMatchClass : AsmOperandClass {
-  let Name = "DPPCtrl";
-  let PredicateMethod = "isDPPCtrl";
-  let ParserMethod = "parseDPPCtrlOps";
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 0;
+class NamedOperandU16<string Name, AsmOperandClass MatchClass> : Operand<i16> {
+  let PrintMethod = "print"#Name;
+  let ParserMatchClass = MatchClass;
 }
 
-class DPPOptionalMatchClass <string OpName>: AsmOperandClass {
-  let Name = "DPPOptional"#OpName;
-  let PredicateMethod = "is"#OpName;
-  let ParserMethod = "parseDPPOptionalOps";
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-def SDWASelMatchClass : AsmOperandClass {
-  let Name = "SDWASel";
-  let PredicateMethod = "isSDWASel";
-  let ParserMethod = "parseSDWASel";
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-def SDWADstUnusedMatchClass : AsmOperandClass {
-  let Name = "SDWADstUnused";
-  let PredicateMethod = "isSDWADstUnused";
-  let ParserMethod = "parseSDWADstUnused";
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-class OptionalImmAsmOperand <string OpName> : AsmOperandClass {
-  let Name = "Imm"#OpName;
-  let PredicateMethod = "isImm";
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-def DMaskMatchClass : AsmOperandClass {
-  let Name = "DMask";
-  let PredicateMethod = "isDMask";
-  let ParserMethod = "parseDMask";
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-class NamedBitMatchClass<string BitName> : AsmOperandClass {
-  let Name = "Imm"#BitName;
-  let PredicateMethod = "is"#BitName;
-  let ParserMethod = "parse"#BitName;
-  let RenderMethod = "addImmOperands";
-  let IsOptional = 1;
-}
-
-class NamedBitOperand<string BitName> : Operand<i1> {
-  let PrintMethod = "print"#BitName;
-}
-
-def HwregMatchClass : AsmOperandClass {
-  let Name = "Hwreg";
-  let PredicateMethod = "isHwreg";
-  let ParserMethod = "parseHwregOp";
-  let RenderMethod = "addImmOperands";
+class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
+  let PrintMethod = "print"#Name;
+  let ParserMatchClass = MatchClass;
 }
 
 let OperandType = "OPERAND_IMMEDIATE" in {
 
-def offen : Operand<i1> {
-  let PrintMethod = "printOffen";
-  let ParserMatchClass = OptionalImmAsmOperand<"offen">;
-}
-def idxen : Operand<i1> {
-  let PrintMethod = "printIdxen";
-  let ParserMatchClass = OptionalImmAsmOperand<"idxen">;
-}
-def addr64 : Operand<i1> {
-  let PrintMethod = "printAddr64";
-}
-def mbuf_offset : Operand<i16> {
-  let PrintMethod = "printMBUFOffset";
-  let ParserMatchClass = MubufOffsetMatchClass;
-}
-class ds_offset_base <AsmOperandClass mc> : Operand<i16> {
-  let PrintMethod = "printDSOffset";
-  let ParserMatchClass = mc;
-}
-def ds_offset : ds_offset_base <DSOffsetMatchClass>;
-def ds_offset_gds : ds_offset_base <DSOffsetGDSMatchClass>;
+def offen : NamedOperandBit<"Offen", NamedMatchClass<"Offen">>;
+def idxen : NamedOperandBit<"Idxen", NamedMatchClass<"Idxen">>;
+def addr64 : NamedOperandBit<"Addr64", NamedMatchClass<"Addr64">>;
 
-def ds_offset0 : Operand<i8> {
-  let PrintMethod = "printDSOffset0";
-  let ParserMatchClass = DSOffset01MatchClass;
-}
-def ds_offset1 : Operand<i8> {
-  let PrintMethod = "printDSOffset1";
-  let ParserMatchClass = DSOffset01MatchClass;
-}
-class gds_base <AsmOperandClass mc> : Operand <i1> {
-  let PrintMethod = "printGDS";
-  let ParserMatchClass = mc;
-}
-def gds : gds_base <GDSMatchClass>;
+def offset : NamedOperandU16<"Offset", NamedMatchClass<"Offset">>;
+def offset0 : NamedOperandU8<"Offset0", NamedMatchClass<"Offset0">>;
+def offset1 : NamedOperandU8<"Offset1", NamedMatchClass<"Offset1">>;
 
-def gds01 : gds_base <GDS01MatchClass>;
+def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>;
 
-class glc_base <AsmOperandClass mc> : Operand <i1> {
-  let PrintMethod = "printGLC";
-  let ParserMatchClass = mc;
-}
+def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
+def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
 
-def glc : glc_base <GLCMubufMatchClass>;
-def glc_flat : glc_base <GLCFlatMatchClass>;
+def smrd_offset : NamedOperandU32<"SMRDOffset", NamedMatchClass<"SMRDOffset">>;
+def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset", NamedMatchClass<"SMRDLiteralOffset">>;
 
-class slc_base <AsmOperandClass mc> : Operand <i1> {
-  let PrintMethod = "printSLC";
-  let ParserMatchClass = mc;
-}
+def glc : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
+def slc : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
+def tfe : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
+def unorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
+def da : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
+def r128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
+def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
 
-def slc : slc_base <SLCMubufMatchClass>;
-def slc_flat : slc_base <SLCFlatMatchClass>;
-def slc_flat_atomic : slc_base <SLCFlatAtomicMatchClass>;
+def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
 
-class tfe_base <AsmOperandClass mc> : Operand <i1> {
-  let PrintMethod = "printTFE";
-  let ParserMatchClass = mc;
-}
+def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
+def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
+def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
+def bound_ctrl : NamedOperandBit<"BoundCtrl", NamedMatchClass<"BoundCtrl">>;
 
-def tfe : tfe_base <TFEMubufMatchClass>;
-def tfe_flat : tfe_base <TFEFlatMatchClass>;
-def tfe_flat_atomic : tfe_base <TFEFlatAtomicMatchClass>;
+def dst_sel : NamedOperandU32<"SDWADstSel", sdwa_sel>;
+def src0_sel : NamedOperandU32<"SDWASrc0Sel", sdwa_sel>;
+def src1_sel : NamedOperandU32<"SDWASrc1Sel", sdwa_sel>;
+def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
 
-def omod : Operand <i32> {
-  let PrintMethod = "printOModSI";
-  let ParserMatchClass = OModMatchClass;
-}
-
-def ClampMod : Operand <i1> {
-  let PrintMethod = "printClampSI";
-  let ParserMatchClass = ClampMatchClass;
-}
-
-def smrd_offset : Operand <i32> {
-  let PrintMethod = "printU32ImmOperand";
-  let ParserMatchClass = SMRDOffsetMatchClass;
-}
-
-def smrd_literal_offset : Operand <i32> {
-  let PrintMethod = "printU32ImmOperand";
-  let ParserMatchClass = SMRDLiteralOffsetMatchClass;
-}
-
-def dmask : Operand <i32> {
-  let PrintMethod = "printDMask";
-  let ParserMatchClass = DMaskMatchClass;
-}
-
-def unorm : NamedBitOperand<"UNorm"> {
-  let ParserMatchClass = NamedBitMatchClass<"UNorm">;
-}
-
-def da : NamedBitOperand<"DA"> {
-  let ParserMatchClass = NamedBitMatchClass<"DA">;
-}
-
-def r128 : NamedBitOperand<"R128"> {
-  let ParserMatchClass = NamedBitMatchClass<"R128">;
-}
-
-def lwe : NamedBitOperand<"LWE"> {
-  let ParserMatchClass = NamedBitMatchClass<"LWE">;
-}
-
-def dpp_ctrl : Operand <i32> {
-  let PrintMethod = "printDPPCtrlOperand";
-  let ParserMatchClass = DPPCtrlMatchClass;
-}
-
-def row_mask : Operand <i32> {
-  let PrintMethod = "printRowMaskOperand";
-  let ParserMatchClass = DPPOptionalMatchClass<"RowMask">;
-}
-
-def bank_mask : Operand <i32> {
-  let PrintMethod = "printBankMaskOperand";
-  let ParserMatchClass = DPPOptionalMatchClass<"BankMask">;
-}
-
-def bound_ctrl : Operand <i1> {
-  let PrintMethod = "printBoundCtrlOperand";
-  let ParserMatchClass = DPPOptionalMatchClass<"BoundCtrl">;
-}
-
-def dst_sel : Operand <i32> {
-  let PrintMethod = "printSDWADstSel";
-  let ParserMatchClass = SDWASelMatchClass;
-}
-
-def src0_sel : Operand <i32> {
-  let PrintMethod = "printSDWASrc0Sel";
-  let ParserMatchClass = SDWASelMatchClass;
-}
-
-def src1_sel : Operand <i32> {
-  let PrintMethod = "printSDWASrc1Sel";
-  let ParserMatchClass = SDWASelMatchClass;
-}
-
-def hwreg : Operand <i16> {
-  let PrintMethod = "printHwreg";
-  let ParserMatchClass = HwregMatchClass;
-}
-
-def dst_unused : Operand <i32> {
-  let PrintMethod = "printSDWADstUnused";
-  let ParserMatchClass = SDWADstUnusedMatchClass;
-}
+def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg">>;
 
 } // End OperandType = "OPERAND_IMMEDIATE"
 
@@ -1401,7 +1154,7 @@
       !if (!eq(HasModifiers, 1),
         // VOP1 with modifiers
         (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
-             ClampMod:$clamp, omod:$omod)
+             clampmod:$clamp, omod:$omod)
       /* else */,
         // VOP1 without modifiers
         (ins Src0RC:$src0)
@@ -1411,7 +1164,7 @@
         // VOP 2 with modifiers
         (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
              InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
-             ClampMod:$clamp, omod:$omod)
+             clampmod:$clamp, omod:$omod)
       /* else */,
         // VOP2 without modifiers
         (ins Src0RC:$src0, Src1RC:$src1)
@@ -1422,7 +1175,7 @@
         (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
              InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
              InputModsNoDefault:$src2_modifiers, Src2RC:$src2,
-             ClampMod:$clamp, omod:$omod)
+             clampmod:$clamp, omod:$omod)
       /* else */,
         // VOP3 without modifiers
         (ins Src0RC:$src0, Src1RC:$src1, Src2RC:$src2)
@@ -1472,7 +1225,7 @@
               !if (!eq(HasModifiers, 1),
                 // VOP1_SDWA with modifiers
                 (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
-                     ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
                      src0_sel:$src0_sel)
               /* else */,
                 // VOP1_SDWA without modifiers
@@ -1484,7 +1237,7 @@
                 // VOP2_SDWA with modifiers
                 (ins InputModsNoDefault:$src0_modifiers, Src0RC:$src0,
                      InputModsNoDefault:$src1_modifiers, Src1RC:$src1,
-                     ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
                      src0_sel:$src0_sel, src1_sel:$src1_sel)
               /* else */,
                 // VOP2_DPP without modifiers
@@ -1545,7 +1298,7 @@
   string args = !if(!eq(HasModifiers, 0),
                      getAsm32<0, NumSrcArgs, DstVT>.ret,
                      ", "#src0#src1);
-  string ret = dst#args#" $dpp_ctrl $row_mask $bank_mask $bound_ctrl";
+  string ret = dst#args#" $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
 }
 
 class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
@@ -1769,7 +1522,7 @@
   let InsSDWA = (ins InputModsNoDefault:$src0_modifiers, Src0RC32:$src0,
                      InputModsNoDefault:$src1_modifiers, Src1RC32:$src1,
                      VGPR_32:$src2, // stub argument
-                     ClampMod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
                      src0_sel:$src0_sel, src1_sel:$src1_sel);
   let Asm32 = getAsm32<1, 2, f32>.ret;
   let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
@@ -2548,7 +2301,7 @@
   (ins InputModsNoDefault:$src0_modifiers, P.Src0RC64:$src0,
        InputModsNoDefault:$src1_modifiers, P.Src1RC64:$src1,
        InputModsNoDefault:$src2_modifiers, P.Src2RC64:$src2,
-       ClampMod:$clamp,
+       clampmod:$clamp,
        omod:$omod),
   "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
   [(set P.DstVT:$vdst,
@@ -2668,7 +2421,7 @@
 
 multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
   dag outs = (outs rc:$vdst),
-  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
+  dag ins = (ins VGPR_32:$addr, offset:$offset, gds:$gds),
   string asm = opName#" $vdst, $addr"#"$offset$gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
@@ -2681,8 +2434,8 @@
 
 multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
   dag outs = (outs rc:$vdst),
-  dag ins = (ins VGPR_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
-                 gds01:$gds),
+  dag ins = (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1,
+                 gds:$gds),
   string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
@@ -2695,7 +2448,7 @@
 
 multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
   dag outs = (outs),
-  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
   string asm = opName#" $addr, $data0"#"$offset$gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>,
@@ -2710,8 +2463,8 @@
 multiclass DS_1A1D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
   dag outs = (outs),
   dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
-              ds_offset0:$offset0, ds_offset1:$offset1, gds01:$gds),
-  string asm = opName#" $addr, $data0, $data1"#"$offset0"#"$offset1"#"$gds"> {
+              offset0:$offset0, offset1:$offset1, gds:$gds),
+  string asm = opName#" $addr, $data0, $data1$offset0$offset1$gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
 
@@ -2724,7 +2477,7 @@
 multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
                         string noRetOp = "",
   dag outs = (outs rc:$vdst),
-  dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
+  dag ins = (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
   string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
 
   let hasPostISelHook = 1 in {
@@ -2773,14 +2526,14 @@
                         string noRetOp = "", RegisterClass src = rc> :
   DS_1A2D_RET_m <op, asm, rc, noRetOp,
                  (ins VGPR_32:$addr, src:$data0, src:$data1,
-                      ds_offset:$offset, gds:$gds)
+                      offset:$offset, gds:$gds)
 >;
 
 multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
                           string noRetOp = opName,
   dag outs = (outs),
   dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
-                 ds_offset:$offset, gds:$gds),
+                 offset:$offset, gds:$gds),
   string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>,
@@ -2794,7 +2547,7 @@
 
 multiclass DS_0A_RET <bits<8> op, string opName,
   dag outs = (outs VGPR_32:$vdst),
-  dag ins = (ins ds_offset:$offset, gds:$gds),
+  dag ins = (ins offset:$offset, gds:$gds),
   string asm = opName#" $vdst"#"$offset"#"$gds"> {
 
   let mayLoad = 1, mayStore = 1 in {
@@ -2809,7 +2562,7 @@
 
 multiclass DS_1A_RET_GDS <bits<8> op, string opName,
   dag outs = (outs VGPR_32:$vdst),
-  dag ins = (ins VGPR_32:$addr, ds_offset_gds:$offset),
+  dag ins = (ins VGPR_32:$addr, offset:$offset),
   string asm = opName#" $vdst, $addr"#"$offset gds"> {
 
   def "" : DS_Pseudo <opName, outs, ins, []>;
@@ -2835,7 +2588,7 @@
 
 multiclass DS_1A <bits<8> op, string opName,
   dag outs = (outs),
-  dag ins = (ins VGPR_32:$addr, ds_offset:$offset, gds:$gds),
+  dag ins = (ins VGPR_32:$addr, offset:$offset, gds:$gds),
   string asm = opName#" $addr"#"$offset"#"$gds"> {
 
   let mayLoad = 1, mayStore = 1 in {
@@ -3065,23 +2818,23 @@
       defm _ADDR64 : MUBUFAtomicAddr64_m <
         op, name#"_addr64", (outs),
         (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
-             SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
-        name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
+             SCSrc_32:$soffset, offset:$offset, slc:$slc),
+        name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$slc", [], 0
       >;
 
       defm _OFFSET : MUBUFAtomicOffset_m <
         op, name#"_offset", (outs),
-        (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, mbuf_offset:$offset,
+        (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset,
              slc:$slc),
-        name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [], 0
+        name#" $vdata, off, $srsrc, $soffset$offset$slc", [], 0
       >;
 
       let offen = 1, idxen = 0 in {
         defm _OFFEN : MUBUFAtomicOther_m <
           op, name#"_offen", (outs),
           (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                mbuf_offset:$offset, slc:$slc),
-          name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$slc", [], 0
+                offset:$offset, slc:$slc),
+          name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$slc", [], 0
         >;
       }
 
@@ -3089,8 +2842,8 @@
         defm _IDXEN : MUBUFAtomicOther_m <
           op, name#"_idxen", (outs),
           (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                mbuf_offset:$offset, slc:$slc),
-          name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$slc", [], 0
+                offset:$offset, slc:$slc),
+          name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$slc", [], 0
         >;
       }
 
@@ -3098,8 +2851,8 @@
         defm _BOTHEN : MUBUFAtomicOther_m <
           op, name#"_bothen", (outs),
           (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                mbuf_offset:$offset, slc:$slc),
-          name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$slc",
+                offset:$offset, slc:$slc),
+          name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$slc",
           [], 0
         >;
       }
@@ -3112,7 +2865,7 @@
       defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
         op, name#"_rtn_addr64", (outs rc:$vdata),
         (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
-             SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
+             SCSrc_32:$soffset, offset:$offset, slc:$slc),
         name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
         [(set vt:$vdata,
          (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
@@ -3122,8 +2875,8 @@
       defm _RTN_OFFSET : MUBUFAtomicOffset_m <
         op, name#"_rtn_offset", (outs rc:$vdata),
         (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
-             mbuf_offset:$offset, slc:$slc),
-        name#" $vdata, $srsrc, $soffset"#"$offset"#" glc$slc",
+             offset:$offset, slc:$slc),
+        name#" $vdata, off, $srsrc, $soffset $offset glc$slc",
         [(set vt:$vdata,
          (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
                                     i1:$slc), vt:$vdata_in))], 1
@@ -3133,7 +2886,7 @@
         defm _RTN_OFFEN : MUBUFAtomicOther_m <
           op, name#"_rtn_offen", (outs rc:$vdata),
           (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                mbuf_offset:$offset, slc:$slc),
+                offset:$offset, slc:$slc),
           name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#" glc"#"$slc",
           [], 1
         >;
@@ -3143,7 +2896,7 @@
         defm _RTN_IDXEN : MUBUFAtomicOther_m <
           op, name#"_rtn_idxen", (outs rc:$vdata),
           (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                mbuf_offset:$offset, slc:$slc),
+                offset:$offset, slc:$slc),
           name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#" glc"#"$slc",
           [], 1
         >;
@@ -3153,7 +2906,7 @@
         defm _RTN_BOTHEN : MUBUFAtomicOther_m <
           op, name#"_rtn_bothen", (outs rc:$vdata),
           (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                mbuf_offset:$offset, slc:$slc),
+                offset:$offset, slc:$slc),
           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#" glc"#"$slc",
           [], 1
         >;
@@ -3173,8 +2926,8 @@
     let offen = 0, idxen = 0, vaddr = 0 in {
       defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
                            (ins SReg_128:$srsrc, SCSrc_32:$soffset,
-                           mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
-                           name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+                           offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, off, $srsrc, $soffset$offset$glc$slc$tfe",
                            [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
                                                      i32:$soffset, i16:$offset,
                                                      i1:$glc, i1:$slc, i1:$tfe)))]>;
@@ -3183,33 +2936,32 @@
     let offen = 1, idxen = 0  in {
       defm _OFFEN  : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
                            (ins VGPR_32:$vaddr, SReg_128:$srsrc,
-                           SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
+                           SCSrc_32:$soffset, offset:$offset, glc:$glc, slc:$slc,
                            tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+                           name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$glc$slc$tfe", []>;
     }
 
     let offen = 0, idxen = 1 in {
       defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
                            (ins VGPR_32:$vaddr, SReg_128:$srsrc,
-                           SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+                           SCSrc_32:$soffset, offset:$offset, glc:$glc,
                            slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>;
     }
 
     let offen = 1, idxen = 1 in {
       defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
                            (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                           mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+                           offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>;
     }
 
     let offen = 0, idxen = 0 in {
       defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
                            (ins VReg_64:$vaddr, SReg_128:$srsrc,
-                                SCSrc_32:$soffset, mbuf_offset:$offset,
+                                SCSrc_32:$soffset, offset:$offset,
 				glc:$glc, slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#
-                                "$glc"#"$slc"#"$tfe",
+                           name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$glc$slc$tfe",
                            [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
                                                   i64:$vaddr, i32:$soffset,
                                                   i16:$offset, i1:$glc, i1:$slc,
@@ -3221,18 +2973,11 @@
 multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
                           ValueType store_vt = i32, SDPatternOperator st = null_frag> {
   let mayLoad = 0, mayStore = 1 in {
-    defm : MUBUF_m <op, name, (outs),
-                    (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                    mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
-                    tfe:$tfe),
-                    name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#
-                         "$glc"#"$slc"#"$tfe", []>;
-
     let offen = 0, idxen = 0, vaddr = 0 in {
       defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
                               (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
-                              mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
-                              name#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe",
+                              offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                              name#" $vdata, off, $srsrc, $soffset$offset$glc$slc$tfe",
                               [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
                                    i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
     } // offen = 0, idxen = 0, vaddr = 0
@@ -3240,35 +2985,35 @@
     let offen = 1, idxen = 0  in {
       defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
                              (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
-                              SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+                              SCSrc_32:$soffset, offset:$offset, glc:$glc,
                               slc:$slc, tfe:$tfe),
-                             name#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#
-                             "$glc"#"$slc"#"$tfe", []>;
+                             name#" $vdata, $vaddr, $srsrc, $soffset offen"#
+                             "$offset$glc$slc$tfe", []>;
     } // end offen = 1, idxen = 0
 
     let offen = 0, idxen = 1 in {
       defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs),
                            (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
-                           SCSrc_32:$soffset, mbuf_offset:$offset, glc:$glc,
+                           SCSrc_32:$soffset, offset:$offset, glc:$glc,
                            slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>;
     }
 
     let offen = 1, idxen = 1 in {
       defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
                            (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                           mbuf_offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
+                           offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
+                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>;
     }
 
     let offen = 0, idxen = 0 in {
       defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
                                     (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
                                          SCSrc_32:$soffset,
-                                         mbuf_offset:$offset, glc:$glc, slc:$slc,
+                                         offset:$offset, glc:$glc, slc:$slc,
                                          tfe:$tfe),
                                     name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
-                                         "$offset"#"$glc"#"$slc"#"$tfe",
+                                         "$offset$glc$slc$tfe",
                                     [(st store_vt:$vdata,
                                       (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr,
                                                    i32:$soffset, i16:$offset,
@@ -3338,8 +3083,8 @@
 multiclass FLAT_Load_Helper <flat op, string asm_name,
     RegisterClass regClass,
     dag outs = (outs regClass:$vdst),
-    dag ins = (ins VReg_64:$addr, glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
-    string asm = asm_name#" $vdst, $addr"#"$glc"#"$slc"#"$tfe"> {
+    dag ins = (ins VReg_64:$addr, glc:$glc, slc:$slc, tfe:$tfe),
+    string asm = asm_name#" $vdst, $addr$glc$slc$tfe"> {
 
   let data = 0, mayLoad = 1 in {
 
@@ -3354,9 +3099,9 @@
 multiclass FLAT_Store_Helper <flat op, string asm_name,
     RegisterClass vdataClass,
     dag outs = (outs),
-    dag ins = (ins VReg_64:$addr, vdataClass:$data, glc_flat:$glc,
-                   slc_flat:$slc, tfe_flat:$tfe),
-    string asm = asm_name#" $addr, $data"#"$glc"#"$slc"#"$tfe"> {
+    dag ins = (ins VReg_64:$addr, vdataClass:$data, glc:$glc,
+                   slc:$slc, tfe:$tfe),
+    string asm = asm_name#" $addr, $data$glc$slc$tfe"> {
 
   let mayLoad = 0, mayStore = 1, vdst = 0 in {
 
@@ -3376,25 +3121,25 @@
   let mayLoad = 1, mayStore = 1, glc = 0, vdst = 0, AsmMatchConverter = "cvtFlatAtomic" in {
     def "" : FLAT_Pseudo <NAME, outs_noret,
                           (ins VReg_64:$addr, data_rc:$data,
-                               slc_flat_atomic:$slc, tfe_flat_atomic:$tfe), []>,
+                               slc:$slc, tfe:$tfe), []>,
              AtomicNoRet <NAME, 0>;
 
     def _ci : FLAT_Real_ci <op.CI, NAME, outs_noret,
                             (ins VReg_64:$addr, data_rc:$data,
-                                 slc_flat_atomic:$slc, tfe_flat_atomic:$tfe),
+                                 slc:$slc, tfe:$tfe),
                             asm_noret>;
 
     def _vi : FLAT_Real_vi <op.VI, NAME, outs_noret,
                             (ins VReg_64:$addr, data_rc:$data,
-                                 slc_flat_atomic:$slc, tfe_flat_atomic:$tfe),
+                                 slc:$slc, tfe:$tfe),
                             asm_noret>;
   }
 
   let glc = 1, hasPostISelHook = 1, AsmMatchConverter = "cvtFlatAtomic" in {
     defm _RTN : FLAT_AtomicRet_m <op, (outs vdst_rc:$vdst),
-                        (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
-                             tfe_flat_atomic:$tfe),
-                        asm_name#" $vdst, $addr, $data glc"#"$slc"#"$tfe", []>;
+                        (ins VReg_64:$addr, data_rc:$data, slc:$slc,
+                             tfe:$tfe),
+                        asm_name#" $vdst, $addr, $data glc$slc$tfe", []>;
   }
 }
 
Index: test/CodeGen/AMDGPU/captured-frame-index.ll
===================================================================
--- test/CodeGen/AMDGPU/captured-frame-index.ll
+++ test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -123,10 +123,10 @@
 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen
 
 ; GCN: v_mov_b32_e32 [[FI1:v[0-9]+]], 4{{$}}
-; GCN: buffer_store_dword [[FI1]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_store_dword [[FI1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 
 ; GCN-DAG: v_mov_b32_e32 [[FI2:v[0-9]+]], 8{{$}}
-; GCN: buffer_store_dword [[FI2]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_store_dword [[FI2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @stored_fi_to_global_2_small_objects(float* addrspace(1)* %ptr) #0 {
   %tmp0 = alloca float
   %tmp1 = alloca float
@@ -150,7 +150,7 @@
 
 ; GCN-DAG: s_add_i32 [[BASE_1_OFF_1:s[0-9]+]], 0, 56
 ; GCN-DAG: v_mov_b32_e32 [[V_BASE_1_OFF_1:v[0-9]+]], [[BASE_1_OFF_1]]
-; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_store_dword [[V_BASE_1_OFF_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @stored_fi_to_global_huge_frame_offset(i32* addrspace(1)* %ptr) #0 {
   %tmp0 = alloca [4096 x i32]
   %tmp1 = alloca [4096 x i32]
Index: test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
===================================================================
--- test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -42,7 +42,7 @@
 ; OPT: br label
 
 ; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
-; CI: buffer_load_dword {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
+; CI: buffer_load_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
 define void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
 entry:
   %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
Index: test/CodeGen/AMDGPU/cgp-addressing-modes.ll
===================================================================
--- test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -40,7 +40,7 @@
 
 ; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset:
 ; GCN: s_and_saveexec_b64
-; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
+; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
 ; GCN: {{^}}BB1_2:
 ; GCN: s_or_b64 exec
 define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
@@ -67,7 +67,7 @@
 
 ; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset:
 ; GCN: s_and_saveexec_b64
-; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
+; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
 ; GCN: {{^}}BB2_2:
 ; GCN: s_or_b64 exec
 define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
@@ -94,7 +94,7 @@
 
 ; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset:
 ; GCN: s_and_saveexec_b64
-; GCN: buffer_load_sbyte {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
+; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
 ; GCN: {{^}}BB3_2:
 ; GCN: s_or_b64 exec
 define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
Index: test/CodeGen/AMDGPU/ctpop.ll
===================================================================
--- test/CodeGen/AMDGPU/ctpop.ll
+++ test/CodeGen/AMDGPU/ctpop.ll
@@ -250,8 +250,8 @@
 }
 
 ; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
-; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
-; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; GCN-DAG: buffer_load_dword [[VAL:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], {{0$}}
+; GCN-DAG: buffer_load_dword [[VAR:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16
 ; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
 ; VI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
 ; GCN: buffer_store_dword [[RESULT]],
Index: test/CodeGen/AMDGPU/fdiv.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/fdiv.f64.ll
+++ test/CodeGen/AMDGPU/fdiv.f64.ll
@@ -4,8 +4,8 @@
 
 
 ; COMMON-LABEL: {{^}}fdiv_f64:
-; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0
-; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; COMMON-DAG: buffer_load_dwordx2 [[NUM:v\[[0-9]+:[0-9]+\]]], off, {{s\[[0-9]+:[0-9]+\]}}, 0
+; COMMON-DAG: buffer_load_dwordx2 [[DEN:v\[[0-9]+:[0-9]+\]]], off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:8
 ; CI-DAG: v_div_scale_f64 [[SCALE0:v\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, [[DEN]], [[DEN]], [[NUM]]
 ; CI-DAG: v_div_scale_f64 [[SCALE1:v\[[0-9]+:[0-9]+\]]], vcc, [[NUM]], [[DEN]], [[NUM]]
 
Index: test/CodeGen/AMDGPU/fmax3.f64.ll
===================================================================
--- test/CodeGen/AMDGPU/fmax3.f64.ll
+++ test/CodeGen/AMDGPU/fmax3.f64.ll
@@ -4,9 +4,9 @@
 declare double @llvm.maxnum.f64(double, double) nounwind readnone
 
 ; SI-LABEL: {{^}}test_fmax3_f64:
-; SI-DAG: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0{{$}}
-; SI-DAG: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:8
-; SI-DAG: buffer_load_dwordx2 [[REGC:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; SI-DAG: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
+; SI-DAG: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
+; SI-DAG: buffer_load_dwordx2 [[REGC:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:16
 ; SI: v_max_f64 [[REGA]], [[REGA]], [[REGB]]
 ; SI: v_max_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[REGA]], [[REGC]]
 ; SI: buffer_store_dwordx2 [[RESULT]],
Index: test/CodeGen/AMDGPU/global_atomics.ll
===================================================================
--- test/CodeGen/AMDGPU/global_atomics.ll
+++ test/CodeGen/AMDGPU/global_atomics.ll
@@ -3,7 +3,7 @@
 
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_offset:
-; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -12,7 +12,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
-; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -48,7 +48,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32:
-; GCN: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
@@ -56,7 +56,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_ret:
-; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -88,7 +88,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_offset:
-; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -97,7 +97,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
-; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -132,7 +132,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32:
-; GCN: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
@@ -140,7 +140,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_ret:
-; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -172,7 +172,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
-; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -181,7 +181,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
-; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -216,7 +216,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32:
-; GCN: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
@@ -224,7 +224,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
-; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -256,7 +256,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_offset:
-; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -265,7 +265,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
-; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -300,7 +300,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32:
-; GCN: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
@@ -308,7 +308,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_ret:
-; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -340,7 +340,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
-; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -349,7 +349,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
-; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -384,7 +384,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32:
-; GCN: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
@@ -392,7 +392,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
-; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -424,7 +424,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_offset:
-; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -433,7 +433,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
-; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -468,7 +468,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32:
-; GCN: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
@@ -476,7 +476,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_ret:
-; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -508,7 +508,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
-; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -517,7 +517,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
-; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -552,7 +552,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32:
-; GCN: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
@@ -560,7 +560,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
-; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -592,7 +592,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_offset:
-; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -601,7 +601,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
-; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -636,7 +636,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32:
-; GCN: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
@@ -644,7 +644,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_ret:
-; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -676,7 +676,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
-; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -685,7 +685,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
-; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -719,7 +719,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32:
-; GCN: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
@@ -727,7 +727,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
-; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -761,7 +761,7 @@
 ; CMP_SWAP
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset:
-; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -770,7 +770,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
-; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword v[[RET]]
 define void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
 entry:
@@ -806,7 +806,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32:
-; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
 entry:
   %0  = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
@@ -814,7 +814,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_ret:
-; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_cmpswap v{{\[}}[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword v[[RET]]
 define void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
 entry:
@@ -848,7 +848,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
-; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
 define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 4
@@ -857,7 +857,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
-; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -892,7 +892,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32:
-; GCN: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0  = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
@@ -900,7 +900,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
-; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -933,7 +933,7 @@
 
 ; ATOMIC_LOAD
 ; FUNC-LABEL: {{^}}atomic_load_i32_offset:
-; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
@@ -945,7 +945,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_load_i32:
-; SI: buffer_load_dword [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
@@ -981,7 +981,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_load_i64_offset:
-; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_load_i64_offset(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
@@ -993,7 +993,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_load_i64:
-; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; VI: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_load_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %out) {
@@ -1030,7 +1030,7 @@
 
 ; ATOMIC_STORE
 ; FUNC-LABEL: {{^}}atomic_store_i32_offset:
-; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
+; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
 define void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
 entry:
@@ -1040,7 +1040,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i32:
-; SI: buffer_store_dword {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}}
+; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc{{$}}
 ; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+}} glc{{$}}
 define void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
 entry:
@@ -1070,7 +1070,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i64_offset:
-; SI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; SI: buffer_store_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; VI: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
 define void @atomic_store_i64_offset(i64 %in, i64 addrspace(1)* %out) {
 entry:
@@ -1080,7 +1080,7 @@
 }
 
 ; FUNC-LABEL: {{^}}atomic_store_i64:
-; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; VI: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}] glc
 define void @atomic_store_i64(i64 %in, i64 addrspace(1)* %out) {
 entry:
Index: test/CodeGen/AMDGPU/global_atomics_i64.ll
===================================================================
--- test/CodeGen/AMDGPU/global_atomics_i64.ll
+++ test/CodeGen/AMDGPU/global_atomics_i64.ll
@@ -3,7 +3,7 @@
 
 
 ; GCN-LABEL: {{^}}atomic_add_i64_offset:
-; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_add_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -12,7 +12,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
-; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_add_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -47,7 +47,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_add_i64:
-; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_add_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_add_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile add i64 addrspace(1)* %out, i64 %in seq_cst
@@ -55,7 +55,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_add_i64_ret:
-; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_add_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -87,7 +87,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_and_i64_offset:
-; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_and_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -96,7 +96,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
-; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_and_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -131,7 +131,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_and_i64:
-; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_and_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_and_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile and i64 addrspace(1)* %out, i64 %in seq_cst
@@ -139,7 +139,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_and_i64_ret:
-; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_and_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -171,7 +171,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_sub_i64_offset:
-; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_sub_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -180,7 +180,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
-; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_sub_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -215,7 +215,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_sub_i64:
-; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_sub_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_sub_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile sub i64 addrspace(1)* %out, i64 %in seq_cst
@@ -223,7 +223,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_sub_i64_ret:
-; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_sub_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -255,7 +255,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_max_i64_offset:
-; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_max_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -264,7 +264,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
-; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_max_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -299,7 +299,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_max_i64:
-; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_max_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile max i64 addrspace(1)* %out, i64 %in seq_cst
@@ -307,7 +307,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_max_i64_ret:
-; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_max_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -339,7 +339,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umax_i64_offset:
-; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_umax_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -348,7 +348,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
-; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_umax_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -383,7 +383,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umax_i64:
-; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umax_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_umax_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile umax i64 addrspace(1)* %out, i64 %in seq_cst
@@ -391,7 +391,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umax_i64_ret:
-; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_umax_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -423,7 +423,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_min_i64_offset:
-; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_min_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -432,7 +432,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
-; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_min_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -467,7 +467,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_min_i64:
-; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_smin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_min_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile min i64 addrspace(1)* %out, i64 %in seq_cst
@@ -475,7 +475,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_min_i64_ret:
-; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_min_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -507,7 +507,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umin_i64_offset:
-; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_umin_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -516,7 +516,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
-; GCN: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_umin_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -551,7 +551,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umin_i64:
-; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_umin_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_umin_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile umin i64 addrspace(1)* %out, i64 %in seq_cst
@@ -559,7 +559,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_umin_i64_ret:
-; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; CI: buffer_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_umin_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -591,7 +591,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_or_i64_offset:
-; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_or_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -600,7 +600,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
-; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_or_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -635,7 +635,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_or_i64:
-; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_or_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_or_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile or i64 addrspace(1)* %out, i64 %in seq_cst
@@ -643,7 +643,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_or_i64_ret:
-; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_or_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -675,7 +675,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xchg_i64_offset:
-; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_xchg_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -684,7 +684,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
-; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_xchg_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -718,7 +718,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xchg_i64:
-; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_xchg_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile xchg i64 addrspace(1)* %out, i64 %in seq_cst
@@ -726,7 +726,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret:
-; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_xchg_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -758,7 +758,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xor_i64_offset:
-; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
 define void @atomic_xor_i64_offset(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %gep = getelementptr i64, i64 addrspace(1)* %out, i64 4
@@ -767,7 +767,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset:
-; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_xor_i64_ret_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
@@ -802,7 +802,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xor_i64:
-; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+; GCN: buffer_atomic_xor_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
 define void @atomic_xor_i64(i64 addrspace(1)* %out, i64 %in) {
 entry:
   %tmp0  = atomicrmw volatile xor i64 addrspace(1)* %out, i64 %in seq_cst
@@ -810,7 +810,7 @@
 }
 
 ; GCN-LABEL: {{^}}atomic_xor_i64_ret:
-; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; GCN: buffer_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
 ; GCN: buffer_store_dwordx2 [[RET]]
 define void @atomic_xor_i64_ret(i64 addrspace(1)* %out, i64 addrspace(1)* %out2, i64 %in) {
 entry:
Index: test/CodeGen/AMDGPU/half.ll
===================================================================
--- test/CodeGen/AMDGPU/half.ll
+++ test/CodeGen/AMDGPU/half.ll
@@ -13,10 +13,10 @@
 }
 
 ; GCN-LABEL: {{^}}load_v2f16_arg:
-; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
-; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
+; GCN-DAG: buffer_store_short [[V0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_short [[V1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
 ; GCN: s_endpgm
 define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
   store <2 x half> %arg, <2 x half> addrspace(1)* %out
@@ -280,8 +280,8 @@
 }
 
 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
-; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
 ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
@@ -378,8 +378,8 @@
 }
 
 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
-; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
@@ -407,8 +407,8 @@
 ; GCN: v_cvt_f64_f32_e32
 ; GCN-NOT: v_cvt_f64_f32_e32
 
-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
 ; GCN: s_endpgm
 define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
   %val = load <3 x half>, <3 x half> addrspace(1)* %in
Index: test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
===================================================================
--- test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
+++ test/CodeGen/AMDGPU/invariant-load-no-alias-store.ll
@@ -21,7 +21,7 @@
 ; GCN-LABEL: {{^}}test_merge_store_constant_i16_invariant_constant_pointer_load:
 ; GCN: s_load_dwordx2 s{{\[}}[[SPTR_LO:[0-9]+]]:[[SPTR_HI:[0-9]+]]{{\]}}
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x1c8007b
-; GCN: buffer_store_dword [[K]], s{{\[}}[[SPTR_LO]]:
+; GCN: buffer_store_dword [[K]], off, s{{\[}}[[SPTR_LO]]:
 define void @test_merge_store_constant_i16_invariant_constant_pointer_load(i16 addrspace(1)* addrspace(2)* dereferenceable(4096) nonnull %in) #0 {
   %ptr = load i16 addrspace(1)*, i16 addrspace(1)* addrspace(2)* %in, !invariant.load !0
   %ptr.1 = getelementptr i16, i16 addrspace(1)* %ptr, i64 1
@@ -32,4 +32,4 @@
 
 !0 = !{}
 
-attributes #0 = { nounwind }
\ No newline at end of file
+attributes #0 = { nounwind }
Index: test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
+++ test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
@@ -7,7 +7,7 @@
 ; FIXME: Out of bounds immediate offset crashes
 
 ; CHECK-LABEL: {{^}}main:
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc
+; CHECK: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc
 ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc slc
 ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen glc slc
 ; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen glc slc
Index: test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
@@ -49,7 +49,7 @@
 
 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
   store i32 %result, i32 addrspace(1)* %out
@@ -58,7 +58,7 @@
 
 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i32_offset:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
 define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
@@ -67,7 +67,7 @@
 }
 
 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32:
-; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
   ret void
@@ -75,7 +75,7 @@
 
 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i32_offset:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_dec [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
@@ -166,7 +166,7 @@
 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 define void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
   store i64 %result, i64 addrspace(1)* %out
@@ -176,7 +176,7 @@
 ; GCN-LABEL: {{^}}global_atomic_dec_ret_i64_offset:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
 define void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
@@ -187,7 +187,7 @@
 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
   ret void
@@ -196,7 +196,7 @@
 ; FUNC-LABEL: {{^}}global_atomic_dec_noret_i64_offset:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
+; GCN: buffer_atomic_dec_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
 define void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
Index: test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
@@ -49,7 +49,7 @@
 
 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 define void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
   store i32 %result, i32 addrspace(1)* %out
@@ -58,7 +58,7 @@
 
 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i32_offset:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
+; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}}
 define void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
@@ -67,7 +67,7 @@
 }
 
 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32:
-; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42)
   ret void
@@ -75,7 +75,7 @@
 
 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i32_offset:
 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42
-; GCN: buffer_atomic_inc [[K]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 define void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
   %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42)
@@ -166,7 +166,7 @@
 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
+; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}}
 define void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
   store i64 %result, i64 addrspace(1)* %out
@@ -176,7 +176,7 @@
 ; GCN-LABEL: {{^}}global_atomic_inc_ret_i64_offset:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
+; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}}
 define void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
@@ -187,7 +187,7 @@
 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 define void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42)
   ret void
@@ -196,7 +196,7 @@
 ; FUNC-LABEL: {{^}}global_atomic_inc_noret_i64_offset:
 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42
 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}}
-; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
+; GCN: buffer_atomic_inc_x2 v{{\[}}[[KLO]]:[[KHI]]{{\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}}
 define void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
   %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
   %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42)
Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@@ -2,7 +2,7 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}test1:
-;CHECK: buffer_atomic_swap v0, s[0:3], 0 glc
+;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK: buffer_atomic_swap v0, v1, s[0:3], 0 idxen glc
 ;CHECK: s_waitcnt vmcnt(0)
@@ -13,9 +13,9 @@
 ;CHECK: buffer_atomic_swap v0, v2, s[0:3], 0 offen offset:42 glc
 ;CHECK-DAG: s_waitcnt vmcnt(0)
 ;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
-;CHECK: buffer_atomic_swap v0, s[0:3], [[SOFS]] offset:1 glc
+;CHECK: buffer_atomic_swap v0, off, s[0:3], [[SOFS]] offset:1 glc
 ;CHECK: s_waitcnt vmcnt(0)
-;CHECK: buffer_atomic_swap v0, s[0:3], 0{{$}}
+;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}}
 define amdgpu_ps float @test1(<4 x i32> inreg %rsrc, i32 %data, i32 %vindex, i32 %voffset) {
 main_body:
   %o1 = call i32 @llvm.amdgcn.buffer.atomic.swap(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
@@ -68,7 +68,7 @@
 ; create copies which we don't bother to track here.
 ;
 ;CHECK-LABEL: {{^}}test3:
-;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 glc
+;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 idxen glc
 ;CHECK: s_waitcnt vmcnt(0)
@@ -79,7 +79,7 @@
 ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v3, s[0:3], 0 offen offset:42 glc
 ;CHECK-DAG: s_waitcnt vmcnt(0)
 ;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1fff
-;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[SOFS]] offset:1 glc
+;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:1 glc
 define amdgpu_ps float @test3(<4 x i32> inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) {
 main_body:
   %o1 = call i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32 %data, i32 %cmp, <4 x i32> %rsrc, i32 0, i32 0, i1 0)
Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
@@ -2,9 +2,9 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}buffer_load:
-;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0
-;CHECK: buffer_load_format_xyzw v[4:7], s[0:3], 0 glc
-;CHECK: buffer_load_format_xyzw v[8:11], s[0:3], 0 slc
+;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0
+;CHECK: buffer_load_format_xyzw v[4:7], off, s[0:3], 0 glc
+;CHECK: buffer_load_format_xyzw v[8:11], off, s[0:3], 0 slc
 ;CHECK: s_waitcnt
 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
 main_body:
@@ -18,7 +18,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs:
-;CHECK: buffer_load_format_xyzw v[0:3], s[0:3], 0 offset:42
+;CHECK: buffer_load_format_xyzw v[0:3], off, s[0:3], 0 offset:42
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
 main_body:
@@ -27,11 +27,11 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
-;CHECK-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 61 offset:4095
+;CHECK-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 61 offset:4095
 ;CHECK-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7fff
-;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS1]] offset:4093
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4093
 ;CHECK: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff
-;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS2]] offset:1
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:1
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
 main_body:
@@ -45,9 +45,9 @@
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs_reuse:
 ;CHECK: s_movk_i32 [[OFS:s[0-9]+]], 0xfff
-;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:65
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:65
 ;CHECK-NOT: s_mov
-;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, s[0:3], [[OFS]] offset:81
+;CHECK: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS]] offset:81
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs_reuse(<4 x i32> inreg) {
 main_body:
@@ -105,7 +105,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_x:
-;CHECK: buffer_load_format_x v0, s[0:3], 0
+;CHECK: buffer_load_format_x v0, off, s[0:3], 0
 ;CHECK: s_waitcnt
 define amdgpu_ps float @buffer_load_x(<4 x i32> inreg %rsrc) {
 main_body:
@@ -114,7 +114,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_xy:
-;CHECK: buffer_load_format_xy v[0:1], s[0:3], 0
+;CHECK: buffer_load_format_xy v[0:1], off, s[0:3], 0
 ;CHECK: s_waitcnt
 define amdgpu_ps <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
 main_body:
Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
@@ -2,9 +2,9 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}buffer_load:
-;CHECK: buffer_load_dwordx4 v[0:3], s[0:3], 0
-;CHECK: buffer_load_dwordx4 v[4:7], s[0:3], 0 glc
-;CHECK: buffer_load_dwordx4 v[8:11], s[0:3], 0 slc
+;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+;CHECK: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
+;CHECK: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
 ;CHECK: s_waitcnt
 define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
 main_body:
@@ -18,7 +18,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs:
-;CHECK: buffer_load_dwordx4 v[0:3], s[0:3], 0 offset:42
+;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:42
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
 main_body:
@@ -28,7 +28,7 @@
 
 ;CHECK-LABEL: {{^}}buffer_load_immoffs_large:
 ;CHECK: s_movk_i32 [[OFFSET:s[0-9]+]], 0x1fff
-;CHECK: buffer_load_dwordx4 v[0:3], s[0:3], [[OFFSET]] offset:1
+;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], [[OFFSET]] offset:1
 ;CHECK: s_waitcnt
 define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
 main_body:
Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
@@ -2,9 +2,9 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}buffer_store:
-;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0
-;CHECK: buffer_store_format_xyzw v[4:7], s[0:3], 0 glc
-;CHECK: buffer_store_format_xyzw v[8:11], s[0:3], 0 slc
+;CHECK: buffer_store_format_xyzw v[0:3], off, s[0:3], 0
+;CHECK: buffer_store_format_xyzw v[4:7], off, s[0:3], 0 glc
+;CHECK: buffer_store_format_xyzw v[8:11], off, s[0:3], 0 slc
 define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
 main_body:
   call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
@@ -14,7 +14,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_store_immoffs:
-;CHECK: buffer_store_format_xyzw v[0:3], s[0:3], 0 offset:42
+;CHECK: buffer_store_format_xyzw v[0:3], off, s[0:3], 0 offset:42
 define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
 main_body:
   call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
@@ -2,9 +2,9 @@
 ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
 
 ;CHECK-LABEL: {{^}}buffer_store:
-;CHECK: buffer_store_dwordx4 v[0:3], s[0:3], 0
-;CHECK: buffer_store_dwordx4 v[4:7], s[0:3], 0 glc
-;CHECK: buffer_store_dwordx4 v[8:11], s[0:3], 0 slc
+;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+;CHECK: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc
+;CHECK: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc
 define amdgpu_ps void @buffer_store(<4 x i32> inreg, <4 x float>, <4 x float>, <4 x float>) {
 main_body:
   call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 0, i1 0, i1 0)
@@ -14,7 +14,7 @@
 }
 
 ;CHECK-LABEL: {{^}}buffer_store_immoffs:
-;CHECK: buffer_store_dwordx4 v[0:3], s[0:3], 0 offset:42
+;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42
 define amdgpu_ps void @buffer_store_immoffs(<4 x i32> inreg, <4 x float>) {
 main_body:
   call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %1, <4 x i32> %0, i32 0, i32 42, i1 0, i1 0)
Index: test/CodeGen/AMDGPU/merge-stores.ll
===================================================================
--- test/CodeGen/AMDGPU/merge-stores.ll
+++ test/CodeGen/AMDGPU/merge-stores.ll
@@ -231,8 +231,8 @@
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_2_adjacent_loads_i32_nonzero_base:
-; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-; GCN: buffer_store_dwordx2 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN: buffer_store_dwordx2 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
 define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2
   %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3
@@ -334,8 +334,8 @@
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_4_adjacent_loads_i32_nonzero_base:
-; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; GCN: buffer_store_dwordx4 [[LOAD]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
+; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; GCN: buffer_store_dwordx4 [[LOAD]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
 define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11
   %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12
@@ -640,13 +640,13 @@
 
 ; GCN-LABEL: {{^}}copy_v3i32_align4:
 ; GCN-NOT: SCRATCH_RSRC_DWORD
-; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 ; GCN-NOT: offen
 ; GCN: s_waitcnt vmcnt
 ; GCN-NOT: offen
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
 
 ; GCN: ScratchSize: 0{{$}}
 define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
@@ -657,13 +657,13 @@
 
 ; GCN-LABEL: {{^}}copy_v3i64_align4:
 ; GCN-NOT: SCRATCH_RSRC_DWORD
-; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN-NOT: offen
 ; GCN: s_waitcnt vmcnt
 ; GCN-NOT: offen
-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN: ScratchSize: 0{{$}}
 define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
   %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
@@ -673,13 +673,13 @@
 
 ; GCN-LABEL: {{^}}copy_v3f32_align4:
 ; GCN-NOT: SCRATCH_RSRC_DWORD
-; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
-; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
 ; GCN-NOT: offen
 ; GCN: s_waitcnt vmcnt
 ; GCN-NOT: offen
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
 ; GCN: ScratchSize: 0{{$}}
 define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
   %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
@@ -690,13 +690,13 @@
 
 ; GCN-LABEL: {{^}}copy_v3f64_align4:
 ; GCN-NOT: SCRATCH_RSRC_DWORD
-; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN-NOT: offen
 ; GCN: s_waitcnt vmcnt
 ; GCN-NOT: offen
-; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
 ; GCN: ScratchSize: 0{{$}}
 define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
   %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
Index: test/CodeGen/AMDGPU/mubuf.ll
===================================================================
--- test/CodeGen/AMDGPU/mubuf.ll
+++ test/CodeGen/AMDGPU/mubuf.ll
@@ -8,7 +8,7 @@
 
 ; MUBUF load with an immediate byte offset that fits into 12-bits
 ; CHECK-LABEL: {{^}}mubuf_load0:
-; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
+; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1
@@ -19,7 +19,7 @@
 
 ; MUBUF load with the largest possible immediate offset
 ; CHECK-LABEL: {{^}}mubuf_load1:
-; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
+; CHECK: buffer_load_ubyte v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095
@@ -31,7 +31,7 @@
 ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
 ; CHECK-LABEL: {{^}}mubuf_load2:
 ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
-; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0
+; CHECK: buffer_load_dword v{{[0-9]}}, off, s[{{[0-9]+:[0-9]+}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x30,0xe0
 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024
@@ -92,7 +92,7 @@
 
 ; MUBUF store with an immediate byte offset that fits into 12-bits
 ; CHECK-LABEL: {{^}}mubuf_store0:
-; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
+; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
 define void @mubuf_store0(i32 addrspace(1)* %out) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1
@@ -102,7 +102,7 @@
 
 ; MUBUF store with the largest possible immediate offset
 ; CHECK-LABEL: {{^}}mubuf_store1:
-; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0
+; CHECK: buffer_store_byte v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0
 
 define void @mubuf_store1(i8 addrspace(1)* %out) {
 entry:
@@ -114,7 +114,7 @@
 ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
 ; CHECK-LABEL: {{^}}mubuf_store2:
 ; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x1000
-; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0
+; CHECK: buffer_store_dword v{{[0-9]}}, off, s[{{[0-9]:[0-9]}}], [[SOFFSET]] ; encoding: [0x00,0x00,0x70,0xe0
 define void @mubuf_store2(i32 addrspace(1)* %out) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %out, i64 1024
@@ -135,14 +135,14 @@
 }
 
 ; CHECK-LABEL: {{^}}store_sgpr_ptr:
-; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0
+; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0
 define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
   store i32 99, i32 addrspace(1)* %out, align 4
   ret void
 }
 
 ; CHECK-LABEL: {{^}}store_sgpr_ptr_offset:
-; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
+; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
 define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 10
   store i32 99, i32 addrspace(1)* %out.gep, align 4
@@ -151,7 +151,7 @@
 
 ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset:
 ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
-; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
+; CHECK: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
 define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
   store i32 99, i32 addrspace(1)* %out.gep, align 4
@@ -160,7 +160,7 @@
 
 ; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset_atomic:
 ; CHECK: s_mov_b32 [[SOFFSET:s[0-9]+]], 0x20000
-; CHECK: buffer_atomic_add v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
+; CHECK: buffer_atomic_add v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, [[SOFFSET]]
 define void @store_sgpr_ptr_large_offset_atomic(i32 addrspace(1)* %out) #0 {
   %gep = getelementptr i32, i32 addrspace(1)* %out, i32 32768
   %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 5 seq_cst
Index: test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
===================================================================
--- test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
+++ test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
@@ -25,7 +25,7 @@
 }
 
 ; GCN-LABEL: {{^}}reduce_i64_align_4_bitcast_v2i32_elt1:
-; GCN: buffer_load_dword [[VAL:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
+; GCN: buffer_load_dword [[VAL:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
 ; GCN: buffer_store_dword [[VAL]]
 define void @reduce_i64_align_4_bitcast_v2i32_elt1(i32 addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
   %a = load i64, i64 addrspace(1)* %in, align 4
Index: test/CodeGen/AMDGPU/schedule-global-loads.ll
===================================================================
--- test/CodeGen/AMDGPU/schedule-global-loads.ll
+++ test/CodeGen/AMDGPU/schedule-global-loads.ll
@@ -7,8 +7,8 @@
 ; ordering the loads so that the lower address loads come first.
 
 ; FUNC-LABEL: {{^}}cluster_global_arg_loads:
-; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
-; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
+; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
 ; SI: buffer_store_dword [[REG0]]
 ; SI: buffer_store_dword [[REG1]]
 define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
Index: test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
===================================================================
--- test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -184,12 +184,12 @@
 }
 
 ; FUNC-LABEL: @reorder_global_offsets
-; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
-; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
-; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
+; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
+; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
 ; CI: buffer_store_dword
 ; CI: s_endpgm
 define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
Index: test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
===================================================================
--- test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -18,7 +18,7 @@
 ; CHECK: s_or_b64 exec, exec, s[2:3]
 ; CHECK-NEXT: s_mov_b32 s7, 0xf000
 ; CHECK-NEXT: s_mov_b32 s6, -1
-; CHECK-NEXT: buffer_store_dword v1, s[4:7], 0
+; CHECK-NEXT: buffer_store_dword v1, off, s[4:7], 0
 ; CHECK-NEXT: s_endpgm
 define void @foobar(float %a0, float %a1, float addrspace(1)* %out) nounwind {
 entry:
Index: test/CodeGen/AMDGPU/v_mac.ll
===================================================================
--- test/CodeGen/AMDGPU/v_mac.ll
+++ test/CodeGen/AMDGPU/v_mac.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}mac_vvv:
-; GCN: buffer_load_dword [[A:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0{{$}}
-; GCN: buffer_load_dword [[B:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:4
-; GCN: buffer_load_dword [[C:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:8
+; GCN: buffer_load_dword [[A:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0{{$}}
+; GCN: buffer_load_dword [[B:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4
+; GCN: buffer_load_dword [[C:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
 ; GCN: v_mac_f32_e32 [[C]], [[B]], [[A]]
 ; GCN: buffer_store_dword [[C]]
 define void @mac_vvv(float addrspace(1)* %out, float addrspace(1)* %in) {
Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
===================================================================
--- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -27,7 +27,7 @@
 ; VIMESA-NEXT: s_mov_b32 s15, 0x980000
 
 
-; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
+; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
 
 ; GCN: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
 ; GCN: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
Index: test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
===================================================================
--- test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -19,8 +19,8 @@
 ; VI-NEXT: s_mov_b32 s15, 0x980000
 
 ; s12 is offset user SGPR
-; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill
-; GCN: buffer_load_dword v{{[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Reload
+; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Spill
+; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], s11 offset:{{[0-9]+}} ; 16-byte Folded Reload
 
 ; GCN: NumVgprs: 256
 ; GCN: ScratchSize: 1024
Index: test/MC/AMDGPU/ds-err.s
===================================================================
--- test/MC/AMDGPU/ds-err.s
+++ test/MC/AMDGPU/ds-err.s
@@ -2,15 +2,15 @@
 // RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
 
 // offset too big
-// CHECK: invalid operand for instruction
+// CHECK: error: invalid operand for instruction
 ds_add_u32 v2, v4 offset:1000000000
 
 // offset0 twice
-// CHECK:  error: not a valid operand.
+// CHECK:  error: invalid operand for instruction
 ds_write2_b32 v2, v4, v6 offset0:4 offset0:8
 
 // offset1 twice
-// CHECK:  error: not a valid operand.
+// CHECK:  error: invalid operand for instruction
 ds_write2_b32 v2, v4, v6 offset1:4 offset1:8
 
 // offset0 too big
Index: test/MC/AMDGPU/mubuf.s
===================================================================
--- test/MC/AMDGPU/mubuf.s
+++ test/MC/AMDGPU/mubuf.s
@@ -14,33 +14,33 @@
 // load - immediate offset only
 //===----------------------------------------------------------------------===//
 
-buffer_load_dword v1, s[4:7], s1
-// SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_dword v1, off, s[4:7], s1
+// SICI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_dword v1, s[4:7], s1 offset:4
-// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_dword v1, off, s[4:7], s1 offset:4
+// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_dword v1, s[4:7], s1 offset:4 glc
-// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x50,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_dword v1, off, s[4:7], s1 offset:4 glc
+// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x50,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_dword v1, s[4:7], s1 offset:4 slc
-// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
-// VI:   buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x52,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_dword v1, off, s[4:7], s1 offset:4 slc
+// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
+// VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x52,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_dword v1, s[4:7], s1 offset:4 tfe
-// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
-// VI:   buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x81,0x01]
+buffer_load_dword v1, off, s[4:7], s1 offset:4 tfe
+// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
+// VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x81,0x01]
 
-buffer_load_dword v1, s[4:7], s1 glc tfe
-// SICI: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
-// VI:   buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x50,0xe0,0x00,0x01,0x81,0x01]
+buffer_load_dword v1, off, s[4:7], s1 glc tfe
+// SICI: buffer_load_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
+// VI:   buffer_load_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x50,0xe0,0x00,0x01,0x81,0x01]
 
-buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe
-// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
-// VI:   buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01]
+buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe
+// SICI: buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
+// VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01]
 
 
 //===----------------------------------------------------------------------===//
@@ -175,33 +175,33 @@
 // store - immediate offset only
 //===----------------------------------------------------------------------===//
 
-buffer_store_dword v1, s[4:7], s1
-// SICI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_dword v1, off, s[4:7], s1
+// SICI: buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_dword v1, s[4:7], s1 offset:4
-// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_dword v1, off, s[4:7], s1 offset:4
+// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_dword v1, s[4:7], s1 offset:4 glc
-// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_dword v1, off, s[4:7], s1 offset:4 glc
+// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_dword v1, s[4:7], s1 offset:4 slc
-// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
-// VI:   buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x72,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_dword v1, off, s[4:7], s1 offset:4 slc
+// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
+// VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x72,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_dword v1, s[4:7], s1 offset:4 tfe
-// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
-// VI:   buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
+buffer_store_dword v1, off, s[4:7], s1 offset:4 tfe
+// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
+// VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
 
-buffer_store_dword v1, s[4:7], s1 glc tfe
-// SICI: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
-// VI:   buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
+buffer_store_dword v1, off, s[4:7], s1 glc tfe
+// SICI: buffer_store_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
+// VI:   buffer_store_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
 
-buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe
-// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
-// VI:   buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01]
+buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe
+// SICI: buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
+// VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01]
 
 //===----------------------------------------------------------------------===//
 // store - vgpr offset
@@ -335,85 +335,85 @@
 // Instructions
 //===----------------------------------------------------------------------===//
 
-buffer_load_format_x v1, s[4:7], s1
-// SICI: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_format_x v1, off, s[4:7], s1
+// SICI: buffer_load_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_format_xy v[1:2], s[4:7], s1
-// SICI: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_format_xy v[1:2], off, s[4:7], s1
+// SICI: buffer_load_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_format_xyz v[1:3], s[4:7], s1
-// SICI: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_format_xyz v[1:3], off, s[4:7], s1
+// SICI: buffer_load_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_format_xyzw v[1:4], s[4:7], s1
-// SICI: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_format_xyzw v[1:4], off, s[4:7], s1
+// SICI: buffer_load_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_format_x v1, s[4:7], s1
-// SICI: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_format_x v1, off, s[4:7], s1
+// SICI: buffer_store_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_format_xy v[1:2], s[4:7], s1
-// SICI: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_format_xy v[1:2], off, s[4:7], s1
+// SICI: buffer_store_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_format_xyz v[1:3], s[4:7], s1
-// SICI: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_format_xyz v[1:3], off, s[4:7], s1
+// SICI: buffer_store_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_format_xyzw v[1:4], s[4:7], s1
-// SICI: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_format_xyzw v[1:4], off, s[4:7], s1
+// SICI: buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_ubyte v1, s[4:7], s1
-// SICI: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_ubyte v1, off, s[4:7], s1
+// SICI: buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_sbyte v1, s[4:7], s1
-// SICI: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_sbyte v1, off, s[4:7], s1
+// SICI: buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_ushort v1, s[4:7], s1
-// SICI: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_ushort v1, off, s[4:7], s1
+// SICI: buffer_load_ushort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_ushort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_sshort v1, s[4:7], s1
-// SICI: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_sshort v1, off, s[4:7], s1
+// SICI: buffer_load_sshort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_sshort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_dword v1, s[4:7], s1
-// SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_dword v1, off, s[4:7], s1
+// SICI: buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_dwordx2 v[1:2], s[4:7], s1
-// SICI: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_dwordx2 v[1:2], off, s[4:7], s1
+// SICI: buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_load_dwordx4 v[1:4], s[4:7], s1
-// SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_dwordx4 v[1:4], off, s[4:7], s1
+// SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_byte v1, s[4:7], s1
-// SICI: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_byte v1, off, s[4:7], s1
+// SICI: buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_short v1, s[4:7], s1
-// SICI: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_short v1, off, s[4:7], s1
+// SICI: buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_dword v1 s[4:7], s1
-// SICI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_dword v1, off, s[4:7], s1
+// SICI: buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_dwordx2 v[1:2], s[4:7], s1
-// SICI: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_dwordx2 v[1:2], off, s[4:7], s1
+// SICI: buffer_store_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
 
-buffer_store_dwordx4 v[1:4], s[4:7], s1
-// SICI: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01]
+buffer_store_dwordx4 v[1:4], off, s[4:7], s1
+// SICI: buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01]
 
 //===----------------------------------------------------------------------===//
 // Cache invalidation
Index: test/MC/AMDGPU/reg-syntax-extra.s
===================================================================
--- test/MC/AMDGPU/reg-syntax-extra.s
+++ test/MC/AMDGPU/reg-syntax-extra.s
@@ -50,6 +50,6 @@
 // SICI: v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x5f,0x02,0x7e]
 // VI:   v_rcp_f64_e32 v[1:2], v[2:3] ; encoding: [0x02,0x4b,0x02,0x7e]
 
-buffer_load_dwordx4 [v1,v2,v3,v4], [s4,s5,s6,s7], s1
-// SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
-// VI:   buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
+buffer_load_dwordx4 [v1,v2,v3,v4], off, [s4,s5,s6,s7], s1
+// SICI: buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
+// VI:   buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
Index: test/MC/Disassembler/AMDGPU/mubuf_vi.txt
===================================================================
--- test/MC/Disassembler/AMDGPU/mubuf_vi.txt
+++ test/MC/Disassembler/AMDGPU/mubuf_vi.txt
@@ -1,24 +1,24 @@
 # RUN: llvm-mc -arch=amdgcn -mcpu=tonga -disassemble -show-encoding < %s | FileCheck %s -check-prefix=VI
 
-# VI:   buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x50 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
 0x04 0x00 0x50 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x50,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x50,0xe0,0x00,0x01,0x01,0x01]
 0x04 0x40 0x50 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x52,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x52,0xe0,0x00,0x01,0x01,0x01]
 0x04 0x00 0x52 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x81,0x01]
+# VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x50,0xe0,0x00,0x01,0x81,0x01]
 0x04 0x00 0x50 0xe0 0x00 0x01 0x81 0x01
 
-# VI:   buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x50,0xe0,0x00,0x01,0x81,0x01]
+# VI:   buffer_load_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x50,0xe0,0x00,0x01,0x81,0x01]
 0x00 0x40 0x50 0xe0 0x00 0x01 0x81 0x01
 
-# VI:   buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01]
+# VI:   buffer_load_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x52,0xe0,0x00,0x01,0x81,0x01]
 0x04 0x40 0x52 0xe0 0x00 0x01 0x81 0x01
 
 # VI:   buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x50,0xe0,0x02,0x01,0x01,0x01]
@@ -84,25 +84,25 @@
 # VI:   buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x52,0xe0,0x02,0x01,0x81,0x01]
 0x04 0x70 0x52 0xe0 0x02 0x01 0x81 0x01
 
-# VI:   buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x70 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
 0x04 0x00 0x70 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
 0x04 0x40 0x70 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x72,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x72,0xe0,0x00,0x01,0x01,0x01]
 0x04 0x00 0x72 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
+# VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
 0x04 0x00 0x70 0xe0 0x00 0x01 0x81 0x01
 
-# VI:   buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
+# VI:   buffer_store_dword v1, off, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
 0x00 0x40 0x70 0xe0 0x00 0x01 0x81 0x01
 
-# VI:   buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01]
+# VI:   buffer_store_dword v1, off, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x72,0xe0,0x00,0x01,0x81,0x01]
 0x04 0x40 0x72 0xe0 0x00 0x01 0x81 0x01
 
 # VI:   buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
@@ -168,64 +168,64 @@
 # VI:   buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x72,0xe0,0x02,0x01,0x81,0x01]
 0x04 0x70 0x72 0xe0 0x02 0x01 0x81 0x01
 
-# VI:   buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x00 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x04 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x08 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x0c 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_format_x v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x10 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_format_xy v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x14 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_format_xyz v[1:3], off, s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x18 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_format_xyzw v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x1c 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_ubyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x40,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x40 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_sbyte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x44,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x44 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_ushort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x48 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_sshort v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x4c,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x4c 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x50,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x50 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x54,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x54 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_load_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x5c,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x5c 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_byte v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x60 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_short v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x68 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_dword v1, off, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x70 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_dwordx2 v[1:2], off, s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x74 0xe0 0x00 0x01 0x01 0x01
 
-# VI:   buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01]
+# VI:   buffer_store_dwordx4 v[1:4], off, s[4:7], s1 ; encoding: [0x00,0x00,0x7c,0xe0,0x00,0x01,0x01,0x01]
 0x00 0x00 0x7c 0xe0 0x00 0x01 0x01 0x01
 
 # VI:   buffer_wbinvl1   ; encoding: [0x00,0x00,0xf8,0xe0,0x00,0x00,0x00,0x00]