Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -639,6 +639,10 @@
     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
   }
 
+  bool isVISrc_256F32() const {
+    return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
+  }
+
   bool isVISrc_256B64() const {
     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
   }
Index: llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
===================================================================
--- llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -177,6 +177,7 @@
   MCOperand decodeOperand_VSrcV216(unsigned Val) const;
   MCOperand decodeOperand_VSrcV232(unsigned Val) const;
 
+  MCOperand decodeOperand_VReg_32(unsigned Val) const;
   MCOperand decodeOperand_VReg_64(unsigned Val) const;
   MCOperand decodeOperand_VReg_96(unsigned Val) const;
   MCOperand decodeOperand_VReg_128(unsigned Val) const;
@@ -250,6 +251,9 @@
 
   MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
                         bool MandatoryLiteral = false) const;
+  MCOperand decodeVSrcOp(const OpWidthTy Width, unsigned Val) const;
+  MCOperand decodeVISrcOp(const OpWidthTy Width, unsigned Val,
+                          unsigned TypeSize) const;
   MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
   MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
   MCOperand decodeSpecialReg32(unsigned Val) const;
Index: llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -121,7 +121,6 @@
 DECODE_OPERAND_REG(VGPR_32)
 DECODE_OPERAND_REG(VGPR_32_Lo128)
 DECODE_OPERAND_REG(VRegOrLds_32)
-DECODE_OPERAND_REG(VS_32)
 DECODE_OPERAND_REG(VS_64)
 DECODE_OPERAND_REG(VS_128)
 
@@ -227,39 +226,80 @@
   return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
 }
 
+static DecodeStatus decodeOperand_VReg_32(MCInst &Inst, unsigned Imm,
+                                          uint64_t Addr,
+                                          const MCDisassembler *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeVSrcOp(AMDGPUDisassembler::OPW32, Imm));
+}
+
 static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, unsigned Imm,
                                           uint64_t Addr,
                                           const MCDisassembler *Decoder) {
   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
-  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm));
+  return addOperand(Inst, DAsm->decodeVSrcOp(AMDGPUDisassembler::OPW64, Imm));
 }
 
 static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, unsigned Imm,
                                            uint64_t Addr,
                                            const MCDisassembler *Decoder) {
   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
-  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm));
+  return addOperand(Inst, DAsm->decodeVSrcOp(AMDGPUDisassembler::OPW128, Imm));
 }
 
 static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, unsigned Imm,
                                            uint64_t Addr,
                                            const MCDisassembler *Decoder) {
   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
-  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm));
+  return addOperand(Inst, DAsm->decodeVSrcOp(AMDGPUDisassembler::OPW256, Imm));
 }
 
-static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, unsigned Imm,
-                                           uint64_t Addr,
-                                           const MCDisassembler *Decoder) {
-  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
-  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm));
+static DecodeStatus decodeOperand_VISrc_64_64(MCInst &Inst, unsigned Imm,
+                                              uint64_t Addr,
+                                              const MCDisassembler *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst,
+                    DAsm->decodeVISrcOp(AMDGPUDisassembler::OPW64, Imm, 64));
 }
 
-static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, unsigned Imm,
+static DecodeStatus decodeOperand_VISrc_128(MCInst &Inst, unsigned Imm,
                                             uint64_t Addr,
                                             const MCDisassembler *Decoder) {
-  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
-  return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
+  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst,
+                    DAsm->decodeVISrcOp(AMDGPUDisassembler::OPW128, Imm, 32));
+}
+
+static DecodeStatus decodeOperand_VISrc_256(MCInst &Inst, unsigned Imm,
+                                            uint64_t Addr,
+                                            const MCDisassembler *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst,
+                    DAsm->decodeVISrcOp(AMDGPUDisassembler::OPW256, Imm, 32));
+}
+
+static DecodeStatus decodeOperand_VISrc_256_64(MCInst &Inst, unsigned Imm,
+                                               uint64_t Addr,
+                                               const MCDisassembler *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst,
+                    DAsm->decodeVISrcOp(AMDGPUDisassembler::OPW256, Imm, 64));
+}
+
+static DecodeStatus decodeOperand_VISrc_512(MCInst &Inst, unsigned Imm,
+                                            uint64_t Addr,
+                                            const MCDisassembler *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst,
+                    DAsm->decodeVISrcOp(AMDGPUDisassembler::OPW512, Imm, 32));
+}
+
+static DecodeStatus decodeOperand_VISrc_1024(MCInst &Inst, unsigned Imm,
+                                             uint64_t Addr,
+                                             const MCDisassembler *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+  return addOperand(Inst,
+                    DAsm->decodeVISrcOp(AMDGPUDisassembler::OPW1024, Imm, 32));
 }
 
 static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
@@ -1264,6 +1304,10 @@
   return decodeSrcOp(OPW512, Val | IS_VGPR);
 }
 
+MCOperand AMDGPUDisassembler::decodeOperand_VReg_32(unsigned Val) const {
+  return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
+}
+
 MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
   return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
 }
@@ -1661,6 +1705,43 @@
   }
 }
 
+MCOperand AMDGPUDisassembler::decodeVSrcOp(const OpWidthTy Width,
+                                           unsigned Val) const {
+  using namespace AMDGPU::EncValues;
+
+  assert(Val < 1024); // enum10
+
+  if (VGPR_MIN <= Val && Val <= VGPR_MAX)
+    return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN);
+
+  return MCOperand();
+}
+
+MCOperand AMDGPUDisassembler::decodeVISrcOp(const OpWidthTy Width, unsigned Val,
+                                            unsigned TypeSize) const {
+  using namespace AMDGPU::EncValues;
+
+  assert(Val < 1024); // enum10
+
+  if (VGPR_MIN <= Val && Val <= VGPR_MAX)
+    return createRegOperand(getVgprClassId(Width), Val - VGPR_MIN);
+
+  // Classes that use this are marked with type. However asm parser does not
+  // care about int/float type only bit width. Floating point instruction
+  // accepts integer literal and vice versa.
+  if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
+    return decodeIntImmed(Val);
+
+  if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) {
+    assert(TypeSize == 32 || TypeSize == 64);
+    if (TypeSize == 32)
+      return MCOperand::createImm(getInlineImmVal32(Val));
+    return MCOperand::createImm(getInlineImmVal64(Val));
+  }
+
+  return MCOperand();
+}
+
 MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
   using namespace AMDGPU::EncValues;
 
Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1244,7 +1244,7 @@
 // but only allows VGPRs.
 def VRegSrc_32 : RegisterOperand<VGPR_32> {
   //let ParserMatchClass = RegImmMatcher<"VRegSrc32">;
-  let DecoderMethod = "DecodeVS_32RegisterClass";
+  let DecoderMethod = "decodeOperand_VReg_32";
 }
 
 def VRegSrc_64 : RegisterOperand<VReg_64> {
@@ -1291,15 +1291,28 @@
 //  VISrc_* Operands with a VGPR or an inline constant
 //===----------------------------------------------------------------------===//
 
+let DecoderMethod = "decodeOperand_VISrc_32" in
 defm VISrc : RegInlineOperand32<"VGPR", "VISrc">;
-let DecoderMethod = "decodeOperand_VReg_64" in
-defm VISrc_64   : RegInlineOperand64<"VReg", "VISrc_64",   "_64">;
+let DecoderMethod = "decodeOperand_VISrc_128" in
 defm VISrc_128  : RegInlineOperandAC<"VReg", "VISrc_128",  "_128">;
-let DecoderMethod = "decodeOperand_VReg_256" in
-defm VISrc_256  : RegInlineOperand64<"VReg", "VISrc_256",  "_256">;
+
+def VISrc_256_f32 : RegisterOperand<!cast<RegisterClass>(VReg_256)> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C_FP32";
+  let ParserMatchClass = RegImmMatcher<"VISrc_256F32">;
+  let DecoderMethod = "decodeOperand_VISrc_256";
+}
+
+let DecoderMethod = "decodeOperand_VISrc_512" in
 defm VISrc_512  : RegInlineOperandAC<"VReg", "VISrc_512",  "_512">;
+let DecoderMethod = "decodeOperand_VISrc_1024" in
 defm VISrc_1024 : RegInlineOperandAC<"VReg", "VISrc_1024", "_1024">;
 
+let DecoderMethod = "decodeOperand_VISrc_64_64" in
+defm VISrc_64   : RegInlineOperand64<"VReg", "VISrc_64",   "_64">;
+let DecoderMethod = "decodeOperand_VISrc_256_64" in
+defm VISrc_256  : RegInlineOperand64<"VReg", "VISrc_256",  "_256">;
+
 //===----------------------------------------------------------------------===//
 //  AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR
 //===----------------------------------------------------------------------===//
Index: llvm/lib/Target/AMDGPU/VOP3PInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -703,7 +703,7 @@
   let DstRC = !if(!eq(Suffix, "_w32"), VDst_256, VDst_128);
   let Src0RC64 = _Src01RC64;
   let Src1RC64 = _Src01RC64;
-  let Src2RC64 = !if(!eq(Suffix, "_w32"), VISrc_256_f64, VISrc_128_f32);
+  let Src2RC64 = !if(!eq(Suffix, "_w32"), VISrc_256_f32, VISrc_128_f32);
   let HasClamp = _HasClamp;
   let HasOpSel = _HasOpSel;
   let IsPacked = 1;
Index: llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
===================================================================
--- llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
+++ llvm/test/MC/Disassembler/AMDGPU/decode-err.txt
@@ -1,34 +1,39 @@
 # RUN: not llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefix=GCN %s
-# RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefixes=GFX11,W32 %s
-# RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefixes=GFX11,W64 %s
+# RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefix=W32 %s
+# RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -disassemble -show-encoding %s 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=W32-ERR %s
+# RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=W64 %s
+# RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1100 -mattr=-WavefrontSize32,+WavefrontSize64 -disassemble -show-encoding %s 2>&1 | FileCheck --implicit-check-not=warning: --check-prefix=W64-ERR %s
 
 # GCN: warning: invalid instruction encoding
+# W32-ERR: warning: invalid instruction encoding
+# W64-ERR: warning: invalid instruction encoding
 [0xdf,0x00,0x00,0x02]
 
 # this is buffer_atomic_csub_u32 v5, off, s[8:11], s3 offset:4095. Invalid without glc
-# GFX11: warning: invalid instruction encoding
+# W32-ERR: warning: invalid instruction encoding
+# W64-ERR: warning: invalid instruction encoding
 [0xff,0x0f,0xdc,0xe0,0x00,0x05,0x02,0x03]
 
 # W32: v_dual_add_f32 v5, 0xaf123456, v2 :: v_dual_fmaak_f32 v6, v3, v1, 0xaf123456 ; encoding: [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
-# W64: warning: invalid instruction encoding
+# W64-ERR: warning: invalid instruction encoding
 [0xff,0x04,0x02,0xc9,0x03,0x03,0x06,0x05,0x56,0x34,0x12,0xaf]
 
 # W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
 # W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
 [0x10,0x40,0x40,0xcc,0x00,0x11,0x42,0x1c]
 
-# W32: v_wmma_f32_16x16x16_f16 v[16:23], /*invalid immediate*/, v[8:15], v[16:23] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x10,0x42,0x1c]
-# W64: v_wmma_f32_16x16x16_f16 v[16:19], /*invalid immediate*/, v[8:15], v[16:19] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x10,0x42,0x1c]
+# W32-ERR: warning: invalid instruction encoding
+# W64-ERR: warning: invalid instruction encoding
 [0x10,0x40,0x40,0xcc,0xf2,0x10,0x42,0x1c] # src0 1.0
 
-# W32: v_wmma_f32_16x16x16_f16 v[16:23], s[0:7], v[8:15], v[16:23] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x10,0x42,0x1c]
-# W64: v_wmma_f32_16x16x16_f16 v[16:19], s[0:7], v[8:15], v[16:19] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x10,0x42,0x1c]
+# W32-ERR: warning: invalid instruction encoding
+# W64-ERR: warning: invalid instruction encoding
 [0x10,0x40,0x40,0xcc,0x00,0x10,0x42,0x1c] # src0 sgpr0
 
 # W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0xca,0x1b]
 # W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], 1.0 ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0xca,0x1b]
 [0x10,0x40,0x40,0xcc,0x00,0x11,0xca,0x1b] # src2 1.0
 
-# W32: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], s[0:7] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x02,0x18]
-# W64: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], s[0:3] ; encoding: [0x10,0x40,0x40,0xcc,0x00,0x11,0x02,0x18]
+# W32-ERR: warning: invalid instruction encoding
+# W64-ERR: warning: invalid instruction encoding
 [0x10,0x40,0x40,0xcc,0x00,0x11,0x02,0x18] # src2 sgpr0