diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1546,6 +1546,10 @@
   Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
   AssemblerPredicate<(all_of FeatureGFX10Insts)>;
 
+def HasGFX10Insts :
+  Predicate<"Subtarget->hasGFX10Insts()">,
+  AssemblerPredicate<(all_of FeatureGFX10Insts)>;
+
 def isGFX10Before1030 :
   Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX10 &&"
             "!Subtarget->hasGFX10_3Insts()">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -554,7 +554,7 @@
       .scalarize(0)
       .widenScalarToNextPow2(0, 32)
       .lower();
-  } else if (ST.has16BitInsts()) {
+  } else if (ST.has16BitInsts() && ST.hasMad64_32()) {
     getActionDefinitionsBuilder({G_ADD, G_SUB})
       .legalFor({S32, S16})
       .minScalar(0, S16)
@@ -563,12 +563,11 @@
       .scalarize(0);
 
     getActionDefinitionsBuilder(G_MUL)
-      .legalFor({S32, S16})
-      .scalarize(0)
-      .minScalar(0, S16)
-      .widenScalarToNextMultipleOf(0, 32)
-      .custom();
-    assert(ST.hasMad64_32());
+        .legalFor({S32, S16})
+        .scalarize(0)
+        .minScalar(0, S16)
+        .widenScalarToNextMultipleOf(0, 32)
+        .custom();
 
     // Technically the saturating operations require clamp bit support, but this
     // was introduced at the same time as 16-bit operations.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -182,6 +182,11 @@
 }
 
 unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
+  if (hasGFX10Insts() && (Opcode == AMDGPU::V_PERMLANE16_B32_e64 ||
+                          Opcode == AMDGPU::V_PERMLANEX16_B32_e64)) {
+    return 2;
+  }
+
   if (getGeneration() < GFX10)
     return 1;
 
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -849,11 +849,13 @@
   bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
 
   /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
-  bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
+  bool hasPermLaneX16() const { return hasGFX10Insts(); }
 
   /// \returns true if the subtarget has the v_permlane64_b32 instruction.
   bool hasPermLane64() const { return getGeneration() >= GFX11; }
 
+  bool hasGFX10Insts() const { return GFX10Insts; }
+
   bool hasGFX11Insts() const { return GFX11Insts; }
 
   bool hasDPP() const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7998,6 +7998,12 @@
       MCOp = NMCOp;
   }
 
+  if (ST.hasGFX10Insts()) {
+    uint16_t NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX10);
+    if (NMCOp != (uint16_t)-1)
+      MCOp = NMCOp;
+  }
+
   if (ST.hasGFX11Insts()) {
     uint16_t NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX11);
     if (NMCOp != (uint16_t)-1)
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -706,7 +706,7 @@
 >;
 
 
-let SubtargetPredicate = isGFX10Plus in {
+let Predicates = [HasGFX10Insts, Has16BitInsts] in {
   let isCommutable = 1, isReMaterializable = 1 in {
     defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
   } // End isCommutable = 1, isReMaterializable = 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
@@ -1,3 +1,4 @@
+; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mattr=+gfx10-insts,+16-bit-insts -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
 ; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
 ; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s