Index: clang/include/clang/Basic/BuiltinsHexagonDep.def
===================================================================
--- clang/include/clang/Basic/BuiltinsHexagonDep.def
+++ clang/include/clang/Basic/BuiltinsHexagonDep.def
@@ -1890,3 +1890,36 @@
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhsat_128B, "V32iV64iV32i", "", HVXV69)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyuhvs, "V16iV16iV16i", "", HVXV69)
 TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyuhvs_128B, "V32iV32iV32i", "", HVXV69)
+
+// V73 HVX Instructions.
+
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_bf, "V32iV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_bf_128B, "V64iV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_h_hf, "V16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_h_hf_128B, "V32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_h, "V16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_h_128B, "V32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_sf_w, "V16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_sf_w_128B, "V32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_w_sf, "V16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_w_sf_128B, "V32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_bf_sf, "V16iV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_bf_sf_128B, "V32iV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf, "V64bV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_128B, "V128bV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_and, "V64bV64bV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_and_128B, "V128bV128bV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_or, "V64bV64bV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_or_128B, "V128bV128bV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_xor, "V64bV64bV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_xor_128B, "V128bV128bV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_bf, "V16iV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_bf_128B, "V32iV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_bf, "V16iV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_bf_128B, "V32iV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_bf, "V32iV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_bf_128B, "V64iV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_bf_acc, "V32iV32iV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_bf_acc_128B, "V64iV64iV32iV32i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_bf, "V32iV16iV16i", "", HVXV73)
+TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_bf_128B, "V64iV32iV32i", "", HVXV73)
Index: lldb/include/lldb/Core/Module.h
===================================================================
--- lldb/include/lldb/Core/Module.h
+++ lldb/include/lldb/Core/Module.h
@@ -815,8 +815,6 @@
   llvm::Expected<lldb::TypeSystemSP>
   GetTypeSystemForLanguage(lldb::LanguageType language);
 
-  /// Call \p callback for each \p TypeSystem in this \p Module.
-  /// Return true from callback to keep iterating, false to stop iterating.
   void ForEachTypeSystem(llvm::function_ref<bool(lldb::TypeSystemSP)> callback);
 
   // Special error functions that can do printf style formatting that will
Index: llvm/include/llvm/IR/IntrinsicsHexagonDep.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsHexagonDep.td
+++ llvm/include/llvm/IR/IntrinsicsHexagonDep.td
@@ -316,7 +316,7 @@
        [llvm_v32i32_ty], [llvm_v64i32_ty],
        intr_properties>;
 
-// tag : V6_lvsplatw
+// tag : V6_lvsplatb
 class Hexagon_v16i32_i32_Intrinsic<string GCCIntSuffix,
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
   : Hexagon_Intrinsic<GCCIntSuffix,
@@ -442,14 +442,14 @@
        [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
        intr_properties>;
 
-// tag : V6_vadd_sf_hf
+// tag : V6_vadd_sf_bf
 class Hexagon_v32i32_v16i32v16i32_Intrinsic<string GCCIntSuffix,
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
   : Hexagon_Intrinsic<GCCIntSuffix,
        [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
        intr_properties>;
 
-// tag : V6_vadd_sf_hf
+// tag : V6_vadd_sf_bf
 class Hexagon_v64i32_v32i32v32i32_Intrinsic<string GCCIntSuffix,
       list<IntrinsicProperty> intr_properties = [IntrNoMem]>
   : Hexagon_Intrinsic<GCCIntSuffix,
@@ -6613,3 +6613,95 @@
 def int_hexagon_V6_vmpyuhvs_128B :
 Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyuhvs_128B">;
 
+// V73 HVX Instructions.
+
+def int_hexagon_V6_vadd_sf_bf :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_sf_bf">;
+
+def int_hexagon_V6_vadd_sf_bf_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_sf_bf_128B">;
+
+def int_hexagon_V6_vconv_h_hf :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_h_hf">;
+
+def int_hexagon_V6_vconv_h_hf_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_h_hf_128B">;
+
+def int_hexagon_V6_vconv_hf_h :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_hf_h">;
+
+def int_hexagon_V6_vconv_hf_h_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_hf_h_128B">;
+
+def int_hexagon_V6_vconv_sf_w :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_sf_w">;
+
+def int_hexagon_V6_vconv_sf_w_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_sf_w_128B">;
+
+def int_hexagon_V6_vconv_w_sf :
+Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_w_sf">;
+
+def int_hexagon_V6_vconv_w_sf_128B :
+Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_w_sf_128B">;
+
+def int_hexagon_V6_vcvt_bf_sf :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt_bf_sf">;
+
+def int_hexagon_V6_vcvt_bf_sf_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt_bf_sf_128B">;
+
+def int_hexagon_V6_vgtbf :
+Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtbf">;
+
+def int_hexagon_V6_vgtbf_128B :
+Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtbf_128B">;
+
+def int_hexagon_V6_vgtbf_and :
+Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtbf_and">;
+
+def int_hexagon_V6_vgtbf_and_128B :
+Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtbf_and_128B">;
+
+def int_hexagon_V6_vgtbf_or :
+Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtbf_or">;
+
+def int_hexagon_V6_vgtbf_or_128B :
+Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtbf_or_128B">;
+
+def int_hexagon_V6_vgtbf_xor :
+Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtbf_xor">;
+
+def int_hexagon_V6_vgtbf_xor_128B :
+Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtbf_xor_128B">;
+
+def int_hexagon_V6_vmax_bf :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmax_bf">;
+
+def int_hexagon_V6_vmax_bf_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmax_bf_128B">;
+
+def int_hexagon_V6_vmin_bf :
+Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmin_bf">;
+
+def int_hexagon_V6_vmin_bf_128B :
+Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmin_bf_128B">;
+
+def int_hexagon_V6_vmpy_sf_bf :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_bf">;
+
+def int_hexagon_V6_vmpy_sf_bf_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_bf_128B">;
+
+def int_hexagon_V6_vmpy_sf_bf_acc :
+Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_bf_acc">;
+
+def int_hexagon_V6_vmpy_sf_bf_acc_128B :
+Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_bf_acc_128B">;
+
+def int_hexagon_V6_vsub_sf_bf :
+Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_sf_bf">;
+
+def int_hexagon_V6_vsub_sf_bf_128B :
+Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_sf_bf_128B">;
+
Index: llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
===================================================================
--- llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -354,6 +354,11 @@
       return false;
     return Value == -1;
   }
+  bool issgp10Const() const {
+    if (!isReg())
+      return false;
+    return getReg() == Hexagon::SGP1_0;
+  }
   bool iss11_0Imm() const {
     return CheckImmRange(11 + 26, 0, true, true, true);
   }
@@ -400,6 +405,9 @@
   void addn1ConstOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
+  void addsgp10ConstOperands(MCInst &Inst, unsigned N) const {
+    addRegOperands(Inst, N);
+  }
 
   StringRef getToken() const {
     assert(Kind == Token && "Invalid access!");
Index: llvm/lib/Target/Hexagon/Hexagon.td
===================================================================
--- llvm/lib/Target/Hexagon/Hexagon.td
+++ llvm/lib/Target/Hexagon/Hexagon.td
@@ -58,6 +58,14 @@
       "Hexagon::ArchEnum::V69", "Hexagon HVX instructions",
       [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66,
        ExtensionHVXV67, ExtensionHVXV68]>;
+def ExtensionHVXV71: SubtargetFeature<"hvxv71", "HexagonHVXVersion",
+      "Hexagon::ArchEnum::V71", "Hexagon HVX instructions",
+      [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66,
+       ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69]>;
+def ExtensionHVXV73: SubtargetFeature<"hvxv73", "HexagonHVXVersion",
+      "Hexagon::ArchEnum::V73", "Hexagon HVX instructions",
+      [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66,
+       ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71]>;
 
 def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps",
       "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>;
@@ -125,6 +133,10 @@
                          AssemblerPredicate<(all_of ExtensionHVXV68)>;
 def UseHVXV69          : Predicate<"HST->useHVXV69Ops()">,
                          AssemblerPredicate<(all_of ExtensionHVXV69)>;
+def UseHVXV71          : Predicate<"HST->useHVXV71Ops()">,
+                         AssemblerPredicate<(all_of ExtensionHVXV71)>;
+def UseHVXV73          : Predicate<"HST->useHVXV73Ops()">,
+                         AssemblerPredicate<(all_of ExtensionHVXV73)>;
 def UseAudio           : Predicate<"HST->useAudioOps()">,
                          AssemblerPredicate<(all_of ExtensionAudio)>;
 def UseZReg            : Predicate<"HST->useZRegOps()">,
@@ -439,6 +451,17 @@
             FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
             FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
             FeatureCabac]>;
+def : Proc<"hexagonv71", HexagonModelV71,
+           [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67,
+            ArchV68, ArchV69, ArchV71,
+            FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
+            FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData,
+            FeatureCabac]>;
+def : Proc<"hexagonv73", HexagonModelV73,
+           [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67,
+            ArchV68, ArchV69, ArchV71, ArchV73,
+            FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops,
+            FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>;
 // Need to update the correct features for tiny core.
 // Disable NewValueJumps since the packetizer is unable to handle a packet with
 // a new value jump and another SLOT0 instruction.
@@ -448,6 +471,13 @@
             FeatureCompound, FeatureMemNoShuf, FeatureMemops,
             FeatureNVS, FeaturePackets, FeatureSmallData]>;
 
+def : Proc<"hexagonv71t", HexagonModelV71T,
+           [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67,
+            ArchV68, ArchV69, ArchV71,
+            ProcTinyCore, ExtensionAudio,
+            FeatureCompound, FeatureMemNoShuf, FeatureMemops,
+            FeatureNVS, FeaturePackets, FeatureSmallData]>;
+
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
Index: llvm/lib/Target/Hexagon/HexagonDepArch.h
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepArch.h
+++ llvm/lib/Target/Hexagon/HexagonDepArch.h
@@ -5,9 +5,6 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-// Automatically generated file, do not edit!
-//===----------------------------------------------------------------------===//
-
 
 #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H
@@ -16,7 +13,21 @@
 
 namespace llvm {
 namespace Hexagon {
-enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68, V69 };
+enum class ArchEnum {
+  NoArch,
+  Generic,
+  V5,
+  V55,
+  V60,
+  V62,
+  V65,
+  V66,
+  V67,
+  V68,
+  V69,
+  V71,
+  V73
+};
 
 inline Optional<Hexagon::ArchEnum> getCpu(StringRef CPU) {
   return StringSwitch<Optional<Hexagon::ArchEnum>>(CPU)
@@ -31,6 +42,9 @@
       .Case("hexagonv67t", Hexagon::ArchEnum::V67)
       .Case("hexagonv68", Hexagon::ArchEnum::V68)
       .Case("hexagonv69", Hexagon::ArchEnum::V69)
+      .Case("hexagonv71", Hexagon::ArchEnum::V71)
+      .Case("hexagonv71t", Hexagon::ArchEnum::V71)
+      .Case("hexagonv73", Hexagon::ArchEnum::V73)
       .Default(None);
 }
 } // namespace Hexagon
Index: llvm/lib/Target/Hexagon/HexagonDepArch.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepArch.td
+++ llvm/lib/Target/Hexagon/HexagonDepArch.td
@@ -26,3 +26,7 @@
 def HasV68 : Predicate<"HST->hasV68Ops()">, AssemblerPredicate<(all_of ArchV68)>;
 def ArchV69: SubtargetFeature<"v69", "HexagonArchVersion", "Hexagon::ArchEnum::V69", "Enable Hexagon V69 architecture">;
 def HasV69 : Predicate<"HST->hasV69Ops()">, AssemblerPredicate<(all_of ArchV69)>;
+def ArchV71: SubtargetFeature<"v71", "HexagonArchVersion", "Hexagon::ArchEnum::V71", "Enable Hexagon V71 architecture">;
+def HasV71 : Predicate<"HST->hasV71Ops()">, AssemblerPredicate<(all_of ArchV71)>;
+def ArchV73: SubtargetFeature<"v73", "HexagonArchVersion", "Hexagon::ArchEnum::V73", "Enable Hexagon V73 architecture">;
+def HasV73 : Predicate<"HST->hasV73Ops()">, AssemblerPredicate<(all_of ArchV73)>;
Index: llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
+++ llvm/lib/Target/Hexagon/HexagonDepIICHVX.td
@@ -8,6 +8,7 @@
 // Automatically generated file, do not edit!
 //===----------------------------------------------------------------------===//
 
+def tc_0390c1ca : InstrItinClass;
 def tc_04da405a : InstrItinClass;
 def tc_05ca8cfd : InstrItinClass;
 def tc_08a4f1b6 : InstrItinClass;
@@ -22,7 +23,7 @@
 def tc_1ad8a370 : InstrItinClass;
 def tc_1ba8a0cd : InstrItinClass;
 def tc_20a4bbec : InstrItinClass;
-def tc_2120355e : InstrItinClass;
+def tc_227864f7 : InstrItinClass;
 def tc_257f6f7c : InstrItinClass;
 def tc_26a377fe : InstrItinClass;
 def tc_2b4c548e : InstrItinClass;
@@ -44,6 +45,7 @@
 def tc_4942646a : InstrItinClass;
 def tc_51d0ecc3 : InstrItinClass;
 def tc_52447ecc : InstrItinClass;
+def tc_531b383c : InstrItinClass;
 def tc_540c3da3 : InstrItinClass;
 def tc_54a0dc47 : InstrItinClass;
 def tc_561aaa58 : InstrItinClass;
@@ -75,6 +77,8 @@
 def tc_90bcc1db : InstrItinClass;
 def tc_933f2b39 : InstrItinClass;
 def tc_946013d8 : InstrItinClass;
+def tc_9a1cab75 : InstrItinClass;
+def tc_9aff7a2a : InstrItinClass;
 def tc_9d1dc972 : InstrItinClass;
 def tc_9f363d21 : InstrItinClass;
 def tc_a02a10a8 : InstrItinClass;
@@ -83,7 +87,6 @@
 def tc_a28f32b5 : InstrItinClass;
 def tc_a69eeee1 : InstrItinClass;
 def tc_a7e6707d : InstrItinClass;
-def tc_aa047364 : InstrItinClass;
 def tc_ab23f776 : InstrItinClass;
 def tc_abe8c3b2 : InstrItinClass;
 def tc_ac4046bc : InstrItinClass;
@@ -106,6 +109,7 @@
 def tc_dd5b0695 : InstrItinClass;
 def tc_df80eeb0 : InstrItinClass;
 def tc_e2d2e9e5 : InstrItinClass;
+def tc_e2fdd6e6 : InstrItinClass;
 def tc_e35c1e93 : InstrItinClass;
 def tc_e3f68a46 : InstrItinClass;
 def tc_e675c45a : InstrItinClass;
@@ -117,6 +121,13 @@
 
 class DepHVXItinV55 {
   list<InstrItinData> DepHVXItinV55_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5],
@@ -192,9 +203,12 @@
        InstrStage<1, [CVI_ST]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2120355e, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
     InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
@@ -306,6 +320,10 @@
        InstrStage<1, [CVI_LD]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_540c3da3, /*SLOT0,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
@@ -467,6 +485,20 @@
        InstrStage<1, [CVI_XLANE]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
     InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
@@ -513,10 +545,6 @@
        InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aa047364, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
     InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -635,6 +663,10 @@
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
@@ -681,6 +713,13 @@
 
 class DepHVXItinV60 {
   list<InstrItinData> DepHVXItinV60_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5],
@@ -756,9 +795,12 @@
        InstrStage<1, [CVI_ST]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2120355e, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
     InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
@@ -870,6 +912,10 @@
        InstrStage<1, [CVI_LD]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_540c3da3, /*SLOT0,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
@@ -1031,6 +1077,20 @@
        InstrStage<1, [CVI_XLANE]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
     InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
@@ -1077,10 +1137,6 @@
        InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aa047364, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
     InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -1199,6 +1255,10 @@
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
@@ -1245,6 +1305,13 @@
 
 class DepHVXItinV62 {
   list<InstrItinData> DepHVXItinV62_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5],
@@ -1320,9 +1387,12 @@
        InstrStage<1, [CVI_ST]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2120355e, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
     InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
@@ -1434,6 +1504,10 @@
        InstrStage<1, [CVI_LD]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_540c3da3, /*SLOT0,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
@@ -1595,6 +1669,20 @@
        InstrStage<1, [CVI_XLANE]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
     InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
@@ -1641,10 +1729,6 @@
        InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aa047364, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
     InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -1763,6 +1847,10 @@
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
@@ -1809,6 +1897,13 @@
 
 class DepHVXItinV65 {
   list<InstrItinData> DepHVXItinV65_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5],
@@ -1884,9 +1979,12 @@
        InstrStage<1, [CVI_ST]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2120355e, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
     InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
@@ -1998,6 +2096,10 @@
        InstrStage<1, [CVI_LD]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_540c3da3, /*SLOT0,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
@@ -2159,6 +2261,20 @@
        InstrStage<1, [CVI_XLANE]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
     InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
@@ -2205,10 +2321,6 @@
        InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aa047364, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
     InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -2327,6 +2439,10 @@
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
@@ -2373,6 +2489,13 @@
 
 class DepHVXItinV66 {
   list<InstrItinData> DepHVXItinV66_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5],
@@ -2448,9 +2571,12 @@
        InstrStage<1, [CVI_ST]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2120355e, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
     InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
@@ -2562,6 +2688,10 @@
        InstrStage<1, [CVI_LD]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_540c3da3, /*SLOT0,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
@@ -2723,6 +2853,20 @@
        InstrStage<1, [CVI_XLANE]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
     InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
@@ -2769,10 +2913,6 @@
        InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aa047364, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
     InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -2891,6 +3031,10 @@
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
@@ -2937,6 +3081,13 @@
 
 class DepHVXItinV67 {
   list<InstrItinData> DepHVXItinV67_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5],
@@ -3012,9 +3163,12 @@
        InstrStage<1, [CVI_ST]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2120355e, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
     InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
@@ -3126,6 +3280,10 @@
        InstrStage<1, [CVI_LD]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_540c3da3, /*SLOT0,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
@@ -3287,6 +3445,20 @@
        InstrStage<1, [CVI_XLANE]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
     InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
@@ -3333,10 +3505,6 @@
        InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aa047364, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
     InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -3455,6 +3623,10 @@
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
@@ -3501,6 +3673,13 @@
 
 class DepHVXItinV68 {
   list<InstrItinData> DepHVXItinV68_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5],
@@ -3576,9 +3755,12 @@
        InstrStage<1, [CVI_ST]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2120355e, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
     InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
@@ -3690,6 +3872,10 @@
        InstrStage<1, [CVI_LD]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_540c3da3, /*SLOT0,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
@@ -3851,6 +4037,20 @@
        InstrStage<1, [CVI_XLANE]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
     InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
@@ -3897,10 +4097,6 @@
        InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aa047364, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
     InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -4019,6 +4215,10 @@
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
@@ -4065,6 +4265,13 @@
 
 class DepHVXItinV69 {
   list<InstrItinData> DepHVXItinV69_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 5],
@@ -4140,9 +4347,12 @@
        InstrStage<1, [CVI_ST]>], [3, 1, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_2120355e, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
-      [HVX_FWD, HVX_FWD]>,
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
     InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
@@ -4254,6 +4464,10 @@
        InstrStage<1, [CVI_LD]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_540c3da3, /*SLOT0,VA*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
@@ -4415,6 +4629,20 @@
        InstrStage<1, [CVI_XLANE]>], [9, 5],
       [HVX_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
     InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
@@ -4461,10 +4689,6 @@
        InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
       [HVX_FWD, Hex_FWD, Hex_FWD]>,
 
-    InstrItinData <tc_aa047364, /*SLOT0123*/
-      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7, 7],
-      [HVX_FWD, HVX_FWD, HVX_FWD]>,
-
     InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
       [InstrStage<1, [SLOT0], 0>,
        InstrStage<1, [CVI_ST]>], [1, 2, 5],
@@ -4583,6 +4807,1194 @@
        InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
       [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
 
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [3],
+      [HVX_FWD]>,
+
+    InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_f175e046, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>
+  ];
+}
+
+class DepHVXItinV71 {
+  list<InstrItinData> DepHVXItinV71_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [],
+      []>,
+
+    InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_37820f4c, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_4942646a, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+      [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_649072c2, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7095ecba, /*SLOT01,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+      [HVX_FWD]>,
+
+    InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_72e2b393, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_73efe966, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [3, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7d68d5c2, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a69eeee1, /*SLOT01,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bb599486, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_cda936da, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_dcca380f, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e3f68a46, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [3],
+      [HVX_FWD]>,
+
+    InstrItinData <tc_e675c45a, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e699ae41, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e99d4c2e, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_f175e046, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f1de44ef, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f21e8abb, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>
+  ];
+}
+
+class DepHVXItinV73 {
+  list<InstrItinData> DepHVXItinV73_list = [
+    InstrItinData <tc_0390c1ca, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_04da405a, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_05ca8cfd, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_08a4f1b6, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_0afc8be9, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_0b04c6c7, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ec46cf9, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_131f1c81, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_1381a97c, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [],
+      []>,
+
+    InstrItinData <tc_15fdf750, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_16ff9ef8, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_191381c1, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7],
+      [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_1ad8a370, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1ba8a0cd, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_20a4bbec, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_227864f7, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_257f6f7c, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_26a377fe, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2b4c548e, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c745bb8, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_2d4051cd, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2e8f5f6e, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_309dbb4f, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_37820f4c, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_3904b926, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3aacf4a8, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_3ad719fb, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3c56e5ce, /*SLOT0,NOSLOT1,LOAD,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3c8c15d0, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_3ce09744, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3e2aaafc, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_447d9895, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_453fe68d, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_46d6c3e0, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_4942646a, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_51d0ecc3, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_52447ecc, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_531b383c, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_540c3da3, /*SLOT0,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1],
+      [Hex_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_54a0dc47, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_561aaa58, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56c4f9fe, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_56e64202, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_58d21193, /*SLOT0,STORE,VA_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_5bf8afbb, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5cdf8c84, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_61bf7c03, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_649072c2, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_660769f1, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_663c80a7, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6942b6e0, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_6e7fa133, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7095ecba, /*SLOT01,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_71646d06, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_7177e272, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_718b5c53, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9],
+      [HVX_FWD]>,
+
+    InstrItinData <tc_7273323b, /*SLOT0,STORE,VA_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_72e2b393, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_73efe966, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_7417e785, /*SLOT0123,VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_SHIFT]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_767c4e9d, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [3, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7d68d5c2, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_7e6a3e89, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_8772086c, /*SLOT0123,VA*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_87adc037, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8e420e4d, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_90bcc1db, /*SLOT2,VX_DV*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_933f2b39, /*SLOT23,4SLOT_MPY*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_946013d8, /*SLOT0123,VP*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_9a1cab75, /*SLOT01,LOAD,VA,VX_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9aff7a2a, /*SLOT0,STORE,VA,VX_DV*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>,
+       InstrStage<1, [CVI_MPY01]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_9d1dc972, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9f363d21, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a02a10a8, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a0dbea28, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a19b9305, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a28f32b5, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a69eeee1, /*SLOT01,LOAD,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7],
+      [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_a7e6707d, /*SLOT0,NOSLOT1,LOAD,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ab23f776, /*SLOT0,STORE*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST]>], [1, 2, 5],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_abe8c3b2, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ac4046bc, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_af25efd9, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7],
+      [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_b091f1c6, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b28e51aa, /*SLOT0123,4SLOT*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_ALL]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_b4416217, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7],
+      [HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_b9db8205, /*SLOT01,LOAD*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bb599486, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c0749f3c, /*SLOT01,LOAD,VA*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_LD], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2],
+      [HVX_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c127de3a, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_c4edf264, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2],
+      [HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c5dba46e, /*SLOT0,STORE,VA*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7],
+      [Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_c7039829, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_cd94bfe0, /*SLOT23,VS_VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>,
+       InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_cda936da, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_d8287c14, /*SLOT23,VX_DV*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01]>], [9, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_db5555f3, /*SLOT0123,VA_DV*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7],
+      [HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_dcca380f, /*SLOT23,VX*/
+      [InstrStage<1, [SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2],
+      [HVX_FWD, HVX_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_dd5b0695, /*SLOT01,ZW*/
+      [InstrStage<1, [SLOT0, SLOT1], 0>,
+       InstrStage<1, [CVI_ZW]>], [2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_df80eeb0, /*SLOT0123,VP_VS*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
+       InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5],
+      [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e2d2e9e5, /*SLOT0,NOSLOT1,STORE,VP*/
+      [InstrStage<1, [SLOT0], 0>,
+       InstrStage<1, [SLOT1], 0>,
+       InstrStage<1, [CVI_ST], 0>,
+       InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5],
+      [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>,
+
+    InstrItinData <tc_e2fdd6e6, /*SLOT0123*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [9, 5],
+      [HVX_FWD, HVX_FWD]>,
+
     InstrItinData <tc_e35c1e93, /*SLOT0123,VA*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7],
Index: llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
+++ llvm/lib/Target/Hexagon/HexagonDepIICScalar.td
@@ -38,11 +38,13 @@
 def tc_2471c1c8 : InstrItinClass;
 def tc_24e109c7 : InstrItinClass;
 def tc_24f426ab : InstrItinClass;
+def tc_27106296 : InstrItinClass;
 def tc_280f7fe1 : InstrItinClass;
 def tc_28e55c6f : InstrItinClass;
 def tc_2c13e7f5 : InstrItinClass;
 def tc_2c3e17fc : InstrItinClass;
 def tc_2f573607 : InstrItinClass;
+def tc_33e7e673 : InstrItinClass;
 def tc_362b0be2 : InstrItinClass;
 def tc_38382228 : InstrItinClass;
 def tc_388f9897 : InstrItinClass;
@@ -107,6 +109,7 @@
 def tc_7b9187d3 : InstrItinClass;
 def tc_7c31e19a : InstrItinClass;
 def tc_7c6d32e4 : InstrItinClass;
+def tc_7dc63b5c : InstrItinClass;
 def tc_7f7f45f5 : InstrItinClass;
 def tc_7f8ae742 : InstrItinClass;
 def tc_8035e91f : InstrItinClass;
@@ -120,12 +123,14 @@
 def tc_8a825db2 : InstrItinClass;
 def tc_8b5bd4f5 : InstrItinClass;
 def tc_8e82e8ca : InstrItinClass;
+def tc_8f36a2fd : InstrItinClass;
 def tc_9124c04f : InstrItinClass;
 def tc_92240447 : InstrItinClass;
 def tc_934753bb : InstrItinClass;
 def tc_937dd41c : InstrItinClass;
 def tc_9406230a : InstrItinClass;
 def tc_95a33176 : InstrItinClass;
+def tc_95f43c5e : InstrItinClass;
 def tc_96ef76ef : InstrItinClass;
 def tc_975a4e54 : InstrItinClass;
 def tc_9783714b : InstrItinClass;
@@ -155,6 +160,7 @@
 def tc_addc37a8 : InstrItinClass;
 def tc_ae5babd7 : InstrItinClass;
 def tc_aee6250c : InstrItinClass;
+def tc_af6af259 : InstrItinClass;
 def tc_b1ae5f67 : InstrItinClass;
 def tc_b4dc7630 : InstrItinClass;
 def tc_b7c4062a : InstrItinClass;
@@ -183,6 +189,7 @@
 def tc_decdde8a : InstrItinClass;
 def tc_df5d53f9 : InstrItinClass;
 def tc_e3d699e3 : InstrItinClass;
+def tc_e60def48 : InstrItinClass;
 def tc_e9170fb7 : InstrItinClass;
 def tc_ed03645c : InstrItinClass;
 def tc_eed07714 : InstrItinClass;
@@ -196,6 +203,7 @@
 def tc_f529831b : InstrItinClass;
 def tc_f6e2aff9 : InstrItinClass;
 def tc_f7569068 : InstrItinClass;
+def tc_f97707c1 : InstrItinClass;
 def tc_f999c66e : InstrItinClass;
 def tc_fae9dfa5 : InstrItinClass;
 def tc_fedb7e19 : InstrItinClass;
@@ -232,11 +240,13 @@
     InstrItinData <tc_2471c1c8, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_24e109c7, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_24f426ab, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_27106296, [InstrStage<1, [SLOT3]>]>,
     InstrItinData <tc_280f7fe1, [InstrStage<1, [SLOT0, SLOT1]>]>,
     InstrItinData <tc_28e55c6f, [InstrStage<1, [SLOT3]>]>,
     InstrItinData <tc_2c13e7f5, [InstrStage<1, [SLOT2, SLOT3]>]>,
     InstrItinData <tc_2c3e17fc, [InstrStage<1, [SLOT3]>]>,
     InstrItinData <tc_2f573607, [InstrStage<1, [SLOT2]>]>,
+    InstrItinData <tc_33e7e673, [InstrStage<1, [SLOT2]>]>,
     InstrItinData <tc_362b0be2, [InstrStage<1, [SLOT2]>]>,
     InstrItinData <tc_38382228, [InstrStage<1, [SLOT2, SLOT3]>]>,
     InstrItinData <tc_388f9897, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
@@ -301,6 +311,7 @@
     InstrItinData <tc_7b9187d3, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_7c31e19a, [InstrStage<1, [SLOT0, SLOT1]>]>,
     InstrItinData <tc_7c6d32e4, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_7dc63b5c, [InstrStage<1, [SLOT3]>]>,
     InstrItinData <tc_7f7f45f5, [InstrStage<1, [SLOT2, SLOT3]>]>,
     InstrItinData <tc_7f8ae742, [InstrStage<1, [SLOT2, SLOT3]>]>,
     InstrItinData <tc_8035e91f, [InstrStage<1, [SLOT0, SLOT1]>]>,
@@ -314,12 +325,14 @@
     InstrItinData <tc_8a825db2, [InstrStage<1, [SLOT2, SLOT3]>]>,
     InstrItinData <tc_8b5bd4f5, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
     InstrItinData <tc_8e82e8ca, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_8f36a2fd, [InstrStage<1, [SLOT0, SLOT1]>]>,
     InstrItinData <tc_9124c04f, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
     InstrItinData <tc_92240447, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_934753bb, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_937dd41c, [InstrStage<1, [SLOT0, SLOT1]>]>,
     InstrItinData <tc_9406230a, [InstrStage<1, [SLOT3]>]>,
     InstrItinData <tc_95a33176, [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+    InstrItinData <tc_95f43c5e, [InstrStage<1, [SLOT2]>]>,
     InstrItinData <tc_96ef76ef, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_975a4e54, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_9783714b, [InstrStage<1, [SLOT2, SLOT3]>]>,
@@ -349,6 +362,7 @@
     InstrItinData <tc_addc37a8, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_ae5babd7, [InstrStage<1, [SLOT0, SLOT1]>]>,
     InstrItinData <tc_aee6250c, [InstrStage<1, [SLOT0, SLOT1]>]>,
+    InstrItinData <tc_af6af259, [InstrStage<1, [SLOT0, SLOT1]>]>,
     InstrItinData <tc_b1ae5f67, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_b4dc7630, [InstrStage<1, [SLOT0, SLOT1]>]>,
     InstrItinData <tc_b7c4062a, [InstrStage<1, [SLOT0, SLOT1]>]>,
@@ -377,6 +391,7 @@
     InstrItinData <tc_decdde8a, [InstrStage<1, [SLOT2, SLOT3]>]>,
     InstrItinData <tc_df5d53f9, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_e3d699e3, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_e60def48, [InstrStage<1, [SLOT2]>]>,
     InstrItinData <tc_e9170fb7, [InstrStage<1, [SLOT0, SLOT1]>]>,
     InstrItinData <tc_ed03645c, [InstrStage<1, [SLOT2]>]>,
     InstrItinData <tc_eed07714, [InstrStage<1, [SLOT0, SLOT1]>]>,
@@ -390,6 +405,7 @@
     InstrItinData <tc_f529831b, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_f6e2aff9, [InstrStage<1, [SLOT0]>]>,
     InstrItinData <tc_f7569068, [InstrStage<1, [SLOT2, SLOT3]>]>,
+    InstrItinData <tc_f97707c1, [InstrStage<1, [SLOT2]>]>,
     InstrItinData <tc_f999c66e, [InstrStage<1, [SLOT2, SLOT3]>]>,
     InstrItinData <tc_fae9dfa5, [InstrStage<1, [SLOT3]>]>,
     InstrItinData <tc_fedb7e19, [InstrStage<1, [SLOT0, SLOT1]>]>  ];
@@ -517,6 +533,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -537,6 +557,10 @@
       [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_2early*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
@@ -793,6 +817,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_3x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -845,6 +873,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -869,6 +901,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -985,6 +1021,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -1097,6 +1137,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -1149,6 +1193,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_2early*/
       [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -1285,6 +1333,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1305,6 +1357,10 @@
       [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_2early*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
@@ -1561,6 +1617,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_4x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1613,6 +1673,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -1637,6 +1701,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -1753,6 +1821,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -1865,6 +1937,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -1917,6 +1993,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_2early*/
       [InstrStage<1, [SLOT2, SLOT3]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -2058,6 +2138,10 @@
        InstrStage<1, [CVI_ST]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2079,6 +2163,11 @@
        InstrStage<1, [CVI_ST]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_ST]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_2early*/
       [InstrStage<1, [SLOT2], 0>,
        InstrStage<1, [CVI_ST]>], [1],
@@ -2348,6 +2437,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_4x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2400,6 +2493,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -2424,6 +2521,11 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_ST]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2543,6 +2645,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -2660,6 +2766,11 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_2early*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_ST]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -2716,6 +2827,11 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2], 0>,
+       InstrStage<1, [CVI_ST]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_2early*/
       [InstrStage<1, [SLOT2, SLOT3], 0>,
        InstrStage<1, [CVI_ST]>], [1, 2],
@@ -2853,6 +2969,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -2873,6 +2993,10 @@
       [InstrStage<1, [SLOT2]>], [2, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_3*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
@@ -3129,6 +3253,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_4x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3181,6 +3309,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -3205,6 +3337,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3321,6 +3457,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -3433,6 +3573,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -3485,6 +3629,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_2early*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -3621,6 +3769,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3641,6 +3793,10 @@
       [InstrStage<1, [SLOT2]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_3*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
@@ -3897,6 +4053,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_4x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -3949,6 +4109,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -3973,6 +4137,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4089,6 +4257,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -4201,6 +4373,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
@@ -4253,6 +4429,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -4389,6 +4569,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4409,6 +4593,10 @@
       [InstrStage<1, [SLOT2]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_3*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
@@ -4665,6 +4853,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_4x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4717,6 +4909,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -4741,6 +4937,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -4857,6 +5057,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -4969,6 +5173,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
@@ -5021,6 +5229,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -5157,6 +5369,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5177,6 +5393,10 @@
       [InstrStage<1, [SLOT2]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_3*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
@@ -5433,6 +5653,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_4x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5485,6 +5709,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -5509,6 +5737,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5625,6 +5857,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -5737,6 +5973,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
@@ -5789,6 +6029,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -5925,6 +6169,10 @@
       [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -5945,6 +6193,10 @@
       [InstrStage<1, [SLOT2]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_3*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
@@ -6201,6 +6453,10 @@
       [InstrStage<1, [SLOT0]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_4x*/
       [InstrStage<1, [SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6253,6 +6509,10 @@
       [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -6277,6 +6537,10 @@
       [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6393,6 +6657,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -6505,6 +6773,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
@@ -6557,6 +6829,10 @@
       [InstrStage<1, [SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -6693,6 +6969,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -6713,6 +6993,10 @@
       [InstrStage<1, [SLOT2]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_3*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
@@ -6969,6 +7253,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_4x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7021,6 +7309,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -7045,6 +7337,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7161,6 +7457,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -7273,6 +7573,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
@@ -7325,6 +7629,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -7461,6 +7769,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_280f7fe1, /*tc_st*/
       [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7481,6 +7793,10 @@
       [InstrStage<1, [SLOT2]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
     InstrItinData <tc_362b0be2, /*tc_3*/
       [InstrStage<1, [SLOT2]>], [1],
       [Hex_FWD]>,
@@ -7737,6 +8053,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_7f7f45f5, /*tc_4x*/
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7789,6 +8109,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_9124c04f, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
       [Hex_FWD, Hex_FWD]>,
@@ -7813,6 +8137,10 @@
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
       [Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
     InstrItinData <tc_96ef76ef, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
@@ -7929,6 +8257,10 @@
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
     InstrItinData <tc_b1ae5f67, /*tc_st*/
       [InstrStage<1, [SLOT0]>], [1],
       [Hex_FWD]>,
@@ -8041,6 +8373,10 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
       [Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_e60def48, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_e9170fb7, /*tc_ld*/
       [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
       [Hex_FWD, Hex_FWD]>,
@@ -8093,6 +8429,2410 @@
       [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
       [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
 
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_f999c66e, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fae9dfa5, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fedb7e19, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+  ];
+}
+
+class DepScalarItinV71 {
+  list<InstrItinData> DepScalarItinV71_list = [
+    InstrItinData <tc_011e0e9d, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_01d44cb2, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_01e1be3b, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_02fe1c65, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0655b949, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_075c8dd8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0a195f2c, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0a6c20ae, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_0dfac0a7, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0fac1eb8, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_112d30d6, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_1242dc2a, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_1248597c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_151bf368, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_158aa3f7, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_197dce51, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1981450d, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_1c2c7a4a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1c7522a8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1d41f8b7, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1fcb8495, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1fe4ab69, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_20131976, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2237d952, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_23708a21, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_2471c1c8, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_24e109c7, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_24f426ab, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_280f7fe1, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_28e55c6f, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c13e7f5, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c3e17fc, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_2f573607, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_362b0be2, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_38382228, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_388f9897, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_38e0bae9, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3d14a17b, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3edca78f, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3fbf1042, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_407e96f9, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_40d64c94, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4222e6bf, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_42ff66ba, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_442395f3, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_44d5a428, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_44fffc58, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_45791fb8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_45f9d1be, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4a55d03c, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4abdbdc6, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4bf903b0, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_503ce0f3, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_53c851ab, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5502c366, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_55255f2b, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_556f6577, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_55a9a350, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_55b33fda, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56a124a7, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_57a55b54, /*tc_1*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5944960d, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_59a7822c, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5a4b5e58, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5b347363, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5da50c4b, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5deb5e47, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5e4cf0e8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_60e324ff, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_63567288, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_64b00d8a, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_651cbe02, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_65279839, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_65cbd974, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_69bfb303, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6ae3426b, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6d861a95, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6e20402a, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6f42bc60, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6fc5dbea, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_711c805f, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_713b66bf, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7401744f, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7476d766, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_74a42bda, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_76bb5435, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_77f94a5e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_788b1d09, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7af3a37e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7c31e19a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7c6d32e4, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7f7f45f5, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7f8ae742, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8035e91f, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_822c3c68, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_829d8a86, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_838c4d7a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_84a7500d, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_86173609, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_887d1bb7, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8a6d0d94, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8a825db2, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8b5bd4f5, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8e82e8ca, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9124c04f, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_92240447, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_934753bb, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_937dd41c, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [],
+      []>,
+
+    InstrItinData <tc_9406230a, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_95a33176, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_96ef76ef, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_975a4e54, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9783714b, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_9b3c0462, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9bcfb2ee, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9c52f549, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9e27f2f9, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9e72dc89, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9edb7c77, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9edefe01, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9f6cd987, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a08b630b, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a1297125, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a154b476, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a2b365d2, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a3070909, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [1, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a32e03e7, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a38c45dc, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a4e22bbd, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a4ee89db, /*tc_2early*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_a7a13fac, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a7bdb22c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a9edeffa, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_abfd9a6d, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ac65613f, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_addc37a8, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ae5babd7, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_aee6250c, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b1ae5f67, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_b4dc7630, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b7c4062a, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b837298f, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_ba9255a6, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bb07f2c5, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bb831a7c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c20701f0, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c21d7447, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c57d9f39, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_ce59038e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_cfa0e29b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d03278fd, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d33e5eee, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d3632d88, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d45ba9cd, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_d57d649c, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_d61dfdc3, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d68dca5c, /*tc_3stall*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d7718fbe, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_db596beb, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_db96aa6b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_dc51281d, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_decdde8a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e3d699e3, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e60def48, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_e9170fb7, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ed03645c, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_eed07714, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_eeda4109, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ef921005, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f098b237, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f0cdeccf, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f0e8e832, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f34c1c21, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f7569068, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_f999c66e, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fae9dfa5, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fedb7e19, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+  ];
+}
+
+class DepScalarItinV71T {
+  list<InstrItinData> DepScalarItinV71T_list = [
+    InstrItinData <tc_011e0e9d, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_01d44cb2, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_01e1be3b, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_02fe1c65, /*tc_4x*/
+      [InstrStage<1, [SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0655b949, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_075c8dd8, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0a195f2c, /*tc_4x*/
+      [InstrStage<1, [SLOT3]>], [5, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0a6c20ae, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_0dfac0a7, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0fac1eb8, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_112d30d6, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_1242dc2a, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_1248597c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_151bf368, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_158aa3f7, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_197dce51, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1981450d, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_1c2c7a4a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1c7522a8, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1d41f8b7, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1fcb8495, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1fe4ab69, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_20131976, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2237d952, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_23708a21, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_2471c1c8, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_24e109c7, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_24f426ab, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_280f7fe1, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_28e55c6f, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c13e7f5, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c3e17fc, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_2f573607, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_362b0be2, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_38382228, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_388f9897, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_38e0bae9, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3d14a17b, /*tc_1*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3edca78f, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3fbf1042, /*tc_1*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_407e96f9, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_40d64c94, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4222e6bf, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_42ff66ba, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_442395f3, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_44d5a428, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_44fffc58, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_45791fb8, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_45f9d1be, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4a55d03c, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4abdbdc6, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4bf903b0, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_503ce0f3, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_53c851ab, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5502c366, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_55255f2b, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_556f6577, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_55a9a350, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_55b33fda, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56a124a7, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_57a55b54, /*tc_1*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5944960d, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_59a7822c, /*tc_1*/
+      [InstrStage<1, [SLOT0]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5a4b5e58, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5b347363, /*tc_1*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5da50c4b, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5deb5e47, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5e4cf0e8, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+      [InstrStage<1, [SLOT0]>], [4, 4, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_60e324ff, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_63567288, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0]>], [4],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_64b00d8a, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_651cbe02, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_65279839, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_65cbd974, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_69bfb303, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6ae3426b, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6d861a95, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6e20402a, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6f42bc60, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6fc5dbea, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_711c805f, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_713b66bf, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7401744f, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7476d766, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_74a42bda, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_76bb5435, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_77f94a5e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_788b1d09, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7af3a37e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7c31e19a, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7c6d32e4, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7f7f45f5, /*tc_4x*/
+      [InstrStage<1, [SLOT3]>], [5, 5, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7f8ae742, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8035e91f, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_822c3c68, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_829d8a86, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_838c4d7a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_84a7500d, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_86173609, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_887d1bb7, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8a6d0d94, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8a825db2, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8b5bd4f5, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8e82e8ca, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9124c04f, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_92240447, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_934753bb, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_937dd41c, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_9406230a, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_95a33176, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_96ef76ef, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_975a4e54, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9783714b, /*tc_4x*/
+      [InstrStage<1, [SLOT3]>], [5, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_9b3c0462, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9bcfb2ee, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9c52f549, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9e27f2f9, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9e72dc89, /*tc_4x*/
+      [InstrStage<1, [SLOT3]>], [5, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9edb7c77, /*tc_4x*/
+      [InstrStage<1, [SLOT3]>], [5, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9edefe01, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9f6cd987, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a08b630b, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a1297125, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a154b476, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a2b365d2, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a3070909, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [1, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a32e03e7, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a38c45dc, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a4e22bbd, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a4ee89db, /*tc_2early*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_a7a13fac, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a7bdb22c, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a9edeffa, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_abfd9a6d, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ac65613f, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_addc37a8, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ae5babd7, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_aee6250c, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b1ae5f67, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_b4dc7630, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b7c4062a, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b837298f, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_ba9255a6, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bb07f2c5, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bb831a7c, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c20701f0, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c21d7447, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c57d9f39, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_ce59038e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_cfa0e29b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d03278fd, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d33e5eee, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d3632d88, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d45ba9cd, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_d57d649c, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_d61dfdc3, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d68dca5c, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d7718fbe, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_db596beb, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_db96aa6b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_dc51281d, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_decdde8a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e3d699e3, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e60def48, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_e9170fb7, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ed03645c, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_eed07714, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_eeda4109, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ef921005, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f098b237, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f0cdeccf, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f0e8e832, /*tc_4x*/
+      [InstrStage<1, [SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f34c1c21, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f7569068, /*tc_4x*/
+      [InstrStage<1, [SLOT3]>], [5, 5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_f999c66e, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fae9dfa5, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_fedb7e19, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>
+  ];
+}
+
+class DepScalarItinV73 {
+  list<InstrItinData> DepScalarItinV73_list = [
+    InstrItinData <tc_011e0e9d, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_01d44cb2, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_01e1be3b, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_02fe1c65, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0655b949, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_075c8dd8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0a195f2c, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0a6c20ae, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0ba0d5da, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_0dfac0a7, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_0fac1eb8, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_112d30d6, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_1242dc2a, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_1248597c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_14ab4f41, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_151bf368, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_158aa3f7, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_197dce51, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1981450d, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_1c2c7a4a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1c7522a8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1d41f8b7, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1fcb8495, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_1fe4ab69, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_20131976, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2237d952, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_23708a21, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_2471c1c8, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_24e109c7, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_24f426ab, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_27106296, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_280f7fe1, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_28e55c6f, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c13e7f5, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_2c3e17fc, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_2f573607, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_33e7e673, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_362b0be2, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_38382228, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_388f9897, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_38e0bae9, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3d14a17b, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3edca78f, /*tc_2*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_3fbf1042, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_407e96f9, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_40d64c94, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4222e6bf, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_42ff66ba, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_442395f3, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_449acf79, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_44d5a428, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_44fffc58, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_45791fb8, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_45f9d1be, /*tc_2early*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_49fdfd4b, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4a55d03c, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4abdbdc6, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4ac61d92, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_4bf903b0, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_503ce0f3, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_53c851ab, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5502c366, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_55255f2b, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_556f6577, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_55a9a350, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_55b33fda, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_56a124a7, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_57a55b54, /*tc_1*/
+      [InstrStage<1, [SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5944960d, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_59a7822c, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5a4b5e58, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5b347363, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5ceb2f9e, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5da50c4b, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5deb5e47, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5e4cf0e8, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_5f2afaf7, /*tc_latepredldaia*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 4, 3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_60e324ff, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_63567288, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_64b00d8a, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_651cbe02, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_65279839, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_65cbd974, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_69bfb303, /*tc_3*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6ae3426b, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6d861a95, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6e20402a, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6f42bc60, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_6fc5dbea, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_711c805f, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_713b66bf, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7401744f, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7476d766, /*tc_3stall*/
+      [InstrStage<1, [SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_74a42bda, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_76bb5435, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_77f94a5e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_788b1d09, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7af3a37e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 3],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7b9187d3, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7c31e19a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7c6d32e4, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7dc63b5c, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7f7f45f5, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_7f8ae742, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8035e91f, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_822c3c68, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_829d8a86, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_838c4d7a, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_84a7500d, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_86173609, /*tc_2latepred*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_887d1bb7, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8a6d0d94, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8a825db2, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8b5bd4f5, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8e82e8ca, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_8f36a2fd, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9124c04f, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_92240447, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_934753bb, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_937dd41c, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [],
+      []>,
+
+    InstrItinData <tc_9406230a, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_95a33176, /*tc_2*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_95f43c5e, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_96ef76ef, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_975a4e54, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 3, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9783714b, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9b34f5e0, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [],
+      []>,
+
+    InstrItinData <tc_9b3c0462, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9bcfb2ee, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9c52f549, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9e27f2f9, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9e72dc89, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9edb7c77, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9edefe01, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_9f6cd987, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a08b630b, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a1297125, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a154b476, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a2b365d2, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a3070909, /*tc_3stall*/
+      [InstrStage<1, [SLOT0]>], [1, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a32e03e7, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a38c45dc, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a4e22bbd, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a4ee89db, /*tc_2early*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_a7a13fac, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a7bdb22c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_a9edeffa, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_abfd9a6d, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ac65613f, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_addc37a8, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ae5babd7, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_aee6250c, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_af6af259, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b1ae5f67, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_b4dc7630, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b7c4062a, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 3, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_b837298f, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [],
+      []>,
+
+    InstrItinData <tc_ba9255a6, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bb07f2c5, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [3, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bb831a7c, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_bf2ffc0f, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c20701f0, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c21d7447, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c57d9f39, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_c818ff7f, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [],
+      []>,
+
+    InstrItinData <tc_ce59038e, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_cfa0e29b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [2, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d03278fd, /*tc_st*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [2, 1, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d33e5eee, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d3632d88, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d45ba9cd, /*tc_ld*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_d57d649c, /*tc_3stall*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_d61dfdc3, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d68dca5c, /*tc_3stall*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_d7718fbe, /*tc_3x*/
+      [InstrStage<1, [SLOT3]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_db596beb, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_db96aa6b, /*tc_st*/
+      [InstrStage<1, [SLOT0]>], [1],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_dc51281d, /*tc_3*/
+      [InstrStage<1, [SLOT2]>], [2, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_decdde8a, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_df5d53f9, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e3d699e3, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_e60def48, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_e9170fb7, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ed03645c, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_eed07714, /*tc_ld*/
+      [InstrStage<1, [SLOT0, SLOT1]>], [4, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_eeda4109, /*tc_1*/
+      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_ef921005, /*tc_1*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [3, 2],
+      [Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f098b237, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f0cdeccf, /*tc_3x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 1, 1, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f0e8e832, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f34c1c21, /*tc_2*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [4, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f38f92e1, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [2],
+      [Hex_FWD]>,
+
+    InstrItinData <tc_f529831b, /*tc_latepredstaia*/
+      [InstrStage<1, [SLOT0]>], [4, 3, 1, 2, 3],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f6e2aff9, /*tc_newvjump*/
+      [InstrStage<1, [SLOT0]>], [3, 2, 2],
+      [Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f7569068, /*tc_4x*/
+      [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1],
+      [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>,
+
+    InstrItinData <tc_f97707c1, /*tc_1*/
+      [InstrStage<1, [SLOT2]>], [2],
+      [Hex_FWD]>,
+
     InstrItinData <tc_f999c66e, /*tc_1*/
       [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2],
       [Hex_FWD, Hex_FWD]>,
Index: llvm/lib/Target/Hexagon/HexagonDepITypes.h
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepITypes.h
+++ llvm/lib/Target/Hexagon/HexagonDepITypes.h
@@ -8,7 +8,6 @@
 // Automatically generated file, do not edit!
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPITYPES_H
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPITYPES_H
 
Index: llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -44,6 +44,14 @@
   let Inst{13-13} = n1{1-1};
   let Inst{8-8} = n1{0-0};
 }
+class Enc_046afa : OpcodeHexagon {
+  bits <1> Mu2;
+  let Inst{13-13} = Mu2{0-0};
+  bits <5> Vss32;
+  let Inst{4-0} = Vss32{4-0};
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
+}
 class Enc_04c959 : OpcodeHexagon {
   bits <2> Ii;
   let Inst{13-13} = Ii{1-1};
@@ -898,6 +906,10 @@
   bits <5> Vxx32;
   let Inst{4-0} = Vxx32{4-0};
 }
+class Enc_403871 : OpcodeHexagon {
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
+}
 class Enc_405228 : OpcodeHexagon {
   bits <11> Ii;
   let Inst{21-20} = Ii{10-9};
@@ -1341,6 +1353,14 @@
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
+class Enc_5eb169 : OpcodeHexagon {
+  bits <3> Ii;
+  let Inst{10-8} = Ii{2-0};
+  bits <5> Vdd32;
+  let Inst{4-0} = Vdd32{4-0};
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
+}
 class Enc_607661 : OpcodeHexagon {
   bits <6> Ii;
   let Inst{12-7} = Ii{5-0};
@@ -1394,6 +1414,15 @@
   bits <5> Rt32;
   let Inst{4-0} = Rt32{4-0};
 }
+class Enc_634460 : OpcodeHexagon {
+  bits <4> Ii;
+  let Inst{13-13} = Ii{3-3};
+  let Inst{10-8} = Ii{2-0};
+  bits <5> Rt32;
+  let Inst{20-16} = Rt32{4-0};
+  bits <5> Vdd32;
+  let Inst{4-0} = Vdd32{4-0};
+}
 class Enc_63eaeb : OpcodeHexagon {
   bits <2> Ii;
   let Inst{1-0} = Ii{1-0};
@@ -1771,6 +1800,14 @@
   let Inst{25-22} = n1{4-1};
   let Inst{13-13} = n1{0-0};
 }
+class Enc_80296d : OpcodeHexagon {
+  bits <5> Rs32;
+  let Inst{12-8} = Rs32{4-0};
+  bits <5> Rtt32;
+  let Inst{20-16} = Rtt32{4-0};
+  bits <5> Rd32;
+  let Inst{4-0} = Rd32{4-0};
+}
 class Enc_802dc0 : OpcodeHexagon {
   bits <1> Ii;
   let Inst{8-8} = Ii{0-0};
@@ -1791,6 +1828,14 @@
   bits <5> Rs32;
   let Inst{20-16} = Rs32{4-0};
 }
+class Enc_829a68 : OpcodeHexagon {
+  bits <1> Mu2;
+  let Inst{13-13} = Mu2{0-0};
+  bits <5> Vdd32;
+  let Inst{4-0} = Vdd32{4-0};
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
+}
 class Enc_830e5d : OpcodeHexagon {
   bits <8> Ii;
   let Inst{12-5} = Ii{7-0};
@@ -2481,6 +2526,14 @@
   bits <5> Rtt32;
   let Inst{12-8} = Rtt32{4-0};
 }
+class Enc_b025d6 : OpcodeHexagon {
+  bits <3> Ii;
+  let Inst{10-8} = Ii{2-0};
+  bits <5> Vss32;
+  let Inst{4-0} = Vss32{4-0};
+  bits <5> Rx32;
+  let Inst{20-16} = Rx32{4-0};
+}
 class Enc_b05839 : OpcodeHexagon {
   bits <7> Ii;
   let Inst{8-5} = Ii{6-3};
@@ -2672,6 +2725,15 @@
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
+class Enc_b98b95 : OpcodeHexagon {
+  bits <4> Ii;
+  let Inst{13-13} = Ii{3-3};
+  let Inst{10-8} = Ii{2-0};
+  bits <5> Rt32;
+  let Inst{20-16} = Rt32{4-0};
+  bits <5> Vss32;
+  let Inst{4-0} = Vss32{4-0};
+}
 class Enc_b9c5fb : OpcodeHexagon {
   bits <5> Rss32;
   let Inst{20-16} = Rss32{4-0};
@@ -2734,6 +2796,12 @@
   bits <5> Rdd32;
   let Inst{4-0} = Rdd32{4-0};
 }
+class Enc_bea5da : OpcodeHexagon {
+  bits <10> Ii;
+  let Inst{17-16} = Ii{9-8};
+  let Inst{12-8} = Ii{7-3};
+  let Inst{4-2} = Ii{2-0};
+}
 class Enc_bfbf03 : OpcodeHexagon {
   bits <2> Qs4;
   let Inst{9-8} = Qs4{1-0};
@@ -2829,6 +2897,14 @@
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
+class Enc_c89067 : OpcodeHexagon {
+  bits <5> Rtt32;
+  let Inst{20-16} = Rtt32{4-0};
+  bits <5> Rdd32;
+  let Inst{4-0} = Rdd32{4-0};
+  bits <5> Rx32;
+  let Inst{12-8} = Rx32{4-0};
+}
 class Enc_c90aca : OpcodeHexagon {
   bits <8> Ii;
   let Inst{12-5} = Ii{7-0};
@@ -2954,6 +3030,11 @@
   bits <5> Rx32;
   let Inst{20-16} = Rx32{4-0};
 }
+class Enc_d0fe02 : OpcodeHexagon {
+  bits <5> Rxx32;
+  let Inst{20-16} = Rxx32{4-0};
+  bits <0> sgp10;
+}
 class Enc_d15d19 : OpcodeHexagon {
   bits <1> Mu2;
   let Inst{13-13} = Mu2{0-0};
@@ -3538,6 +3619,14 @@
   bits <5> Rd32;
   let Inst{4-0} = Rd32{4-0};
 }
+class Enc_fc4562 : OpcodeHexagon {
+  bits <5> Rs32;
+  let Inst{12-8} = Rs32{4-0};
+  bits <5> Rtt32;
+  let Inst{20-16} = Rtt32{4-0};
+  bits <5> Rdd32;
+  let Inst{4-0} = Rdd32{4-0};
+}
 class Enc_fcf7a7 : OpcodeHexagon {
   bits <5> Rss32;
   let Inst{20-16} = Rss32{4-0};
Index: llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -4959,6 +4959,18 @@
 let hasSideEffects = 1;
 let isTaken = Inst{12};
 }
+def J2_callrh : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"callrh $Rs32",
+tc_95f43c5e, TypeJ>, Enc_ecbcc8, Requires<[HasV73]> {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010000110;
+let isCall = 1;
+let prefersSlot3 = 1;
+let cofMax1 = 1;
+let Defs = [PC, R31];
+}
 def J2_callrt : HInst<
 (outs),
 (ins PredRegs:$Pu4, IntRegs:$Rs32),
@@ -5308,6 +5320,19 @@
 let Defs = [PC];
 let isTaken = Inst{12};
 }
+def J2_jumprh : HInst<
+(outs),
+(ins IntRegs:$Rs32),
+"jumprh $Rs32",
+tc_f97707c1, TypeJ>, Enc_ecbcc8, Requires<[HasV73]> {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01010010110;
+let isTerminator = 1;
+let isIndirectBranch = 1;
+let isBranch = 1;
+let cofMax1 = 1;
+let Defs = [PC];
+}
 def J2_jumprltez : HInst<
 (outs),
 (ins IntRegs:$Rs32, b13_2Imm:$Ii),
@@ -5680,13 +5705,13 @@
 }
 def J2_pause : HInst<
 (outs),
-(ins u8_0Imm:$Ii),
+(ins u10_0Imm:$Ii),
 "pause(#$Ii)",
-tc_d57d649c, TypeJ>, Enc_a51a9a {
+tc_d57d649c, TypeJ>, Enc_bea5da {
 let Inst{1-0} = 0b00;
 let Inst{7-5} = 0b000;
 let Inst{13-13} = 0b0;
-let Inst{31-16} = 0b0101010001000000;
+let Inst{31-18} = 0b01010100010000;
 let isSolo = 1;
 }
 def J2_ploop1si : HInst<
@@ -5838,6 +5863,15 @@
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
+def J2_unpause : HInst<
+(outs),
+(ins),
+"unpause",
+tc_33e7e673, TypeJ>, Enc_e3b0c4, Requires<[HasV73]> {
+let Inst{13-0} = 0b01000000000000;
+let Inst{31-16} = 0b0101011111100000;
+let isSolo = 1;
+}
 def J4_cmpeq_f_jumpnv_nt : HInst<
 (outs),
 (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii),
@@ -8494,7 +8528,7 @@
 (outs),
 (ins IntRegs:$Rs32),
 "hintjr($Rs32)",
-tc_60e324ff, TypeJ>, Enc_ecbcc8 {
+tc_e60def48, TypeJ>, Enc_ecbcc8 {
 let Inst{13-0} = 0b00000000000000;
 let Inst{31-21} = 0b01010010101;
 let isTerminator = 1;
@@ -13775,6 +13809,18 @@
 let isPseudo = 1;
 let isCodeGenOnly = 1;
 }
+def L6_linecpy : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rdd32 = linecpy($Rs32,$Rtt32)",
+tc_8f36a2fd, TypeLD>, Enc_fc4562, Requires<[HasV73]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10011001111;
+let mayLoad = 1;
+let isSolo = 1;
+let mayStore = 1;
+}
 def L6_memcpy : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32, ModRegs:$Mu2),
@@ -13786,6 +13832,33 @@
 let isSolo = 1;
 let mayStore = 1;
 }
+def L6_movlen : HInst<
+(outs IntRegs:$Rd32),
+(ins IntRegs:$Rs32, DoubleRegs:$Rtt32),
+"$Rd32 = movlen($Rs32,$Rtt32)",
+tc_5a4b5e58, TypeCR>, Enc_80296d, Requires<[HasV73]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b01101111111;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isSolo = 1;
+}
+def L6_pmemcpy : HInst<
+(outs DoubleRegs:$Rdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, DoubleRegs:$Rtt32),
+"$Rdd32 = pmemcpy($Rx32,$Rtt32)",
+tc_af6af259, TypeLD>, Enc_c89067, Requires<[HasV73]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b0;
+let Inst{31-21} = 0b10011001111;
+let hasNewValue = 1;
+let opNewValue = 1;
+let mayLoad = 1;
+let isSolo = 1;
+let mayStore = 1;
+let Constraints = "$Rx32 = $Rx32in";
+}
 def L6_return_map_to_raw : HInst<
 (outs),
 (ins),
@@ -26741,6 +26814,31 @@
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
+def V6_dbl_ld0 : HInst<
+(outs HvxWR:$Vdd32),
+(ins IntRegs:$Rt32),
+"$Vdd32 = vmem($Rt32)",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV73]> {
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVLoad = 1;
+let isCVI = 1;
+let mayLoad = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_dbl_st0 : HInst<
+(outs),
+(ins IntRegs:$Rt32, HvxWR:$Vss32),
+"vmem($Rt32) = $Vss32",
+PSEUDO, TypeMAPPING>, Requires<[UseHVXV73]> {
+let isCVI = 1;
+let mayStore = 1;
+let isPseudo = 1;
+let isCodeGenOnly = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_extractw : HInst<
 (outs IntRegs:$Rd32),
 (ins HvxVR:$Vu32, IntRegs:$Rs32),
@@ -26752,6 +26850,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let isSolo = 1;
 let mayLoad = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -27045,6 +27145,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_pred_or : HInst<
@@ -27470,6 +27572,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_ai";
@@ -27493,6 +27596,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_cur_ai";
@@ -27516,6 +27620,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_cur_ai";
@@ -27538,6 +27643,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_cur_pi";
@@ -27560,6 +27666,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_cur_ppu";
@@ -27581,6 +27688,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_cur_pi";
@@ -27603,6 +27711,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_cur_ppu";
@@ -27625,6 +27734,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_cur_ai";
@@ -27646,6 +27756,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_cur_pi";
@@ -27667,6 +27778,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_cur_ppu";
@@ -27688,6 +27800,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_ai";
@@ -27709,6 +27822,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_pi";
@@ -27730,6 +27844,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_ppu";
@@ -27750,6 +27865,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27774,6 +27890,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27798,6 +27915,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27821,6 +27939,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27844,6 +27963,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27866,6 +27986,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27889,6 +28010,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27912,6 +28034,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27934,6 +28057,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27956,6 +28080,7 @@
 let isCVLoad = 1;
 let isCVI = 1;
 let CVINew = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -27978,6 +28103,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -28000,6 +28126,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -28022,6 +28149,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -28043,6 +28171,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -28066,6 +28195,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -28089,6 +28219,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -28110,6 +28241,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -28131,6 +28263,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isNonTemporal = 1;
 let isRestrictNoSlot1Store = 1;
@@ -28356,6 +28489,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_pi";
@@ -28378,6 +28512,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_ppu";
@@ -28400,6 +28535,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_ai";
@@ -28420,6 +28556,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_pi";
@@ -28440,6 +28577,7 @@
 let accessSize = HVXVectorAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let isRestrictNoSlot1Store = 1;
 let BaseOpcode = "V6_vL32b_ppu";
@@ -28641,6 +28779,64 @@
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
 }
+def V6_vL64b_ai : HInst<
+(outs HvxWR:$Vdd32),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii),
+"$Vdd32 = vmem($Rt32+#$Ii)",
+tc_0390c1ca, TypeCVI_VM_LD>, Enc_634460, Requires<[UseHVXV73]> {
+let Inst{7-5} = 0b011;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = BaseImmOffset;
+let accessSize = HVXVectorAccess;
+let isCVLoad = 1;
+let isCVI = 1;
+let isHVXALU = 1;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vL64b_pi : HInst<
+(outs HvxWR:$Vdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii),
+"$Vdd32 = vmem($Rx32++#$Ii)",
+tc_9a1cab75, TypeCVI_VM_LD>, Enc_5eb169, Requires<[UseHVXV73]> {
+let Inst{7-5} = 0b011;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HVXVectorAccess;
+let isCVLoad = 1;
+let isCVI = 1;
+let isHVXALU = 1;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vL64b_ppu : HInst<
+(outs HvxWR:$Vdd32, IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2),
+"$Vdd32 = vmem($Rx32++$Mu2)",
+tc_9a1cab75, TypeCVI_VM_LD>, Enc_829a68, Requires<[UseHVXV73]> {
+let Inst{12-5} = 0b00000011;
+let Inst{31-21} = 0b00101011010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let addrMode = PostInc;
+let accessSize = HVXVectorAccess;
+let isCVLoad = 1;
+let isCVI = 1;
+let isHVXALU = 1;
+let mayLoad = 1;
+let isRestrictNoSlot1Store = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
 def V6_vS32Ub_ai : HInst<
 (outs),
 (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32),
@@ -28802,6 +28998,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
 let CextOpcode = "V6_vS32b";
@@ -29007,6 +29204,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
 let isNVStorable = 1;
@@ -29025,6 +29223,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
 let isNVStorable = 1;
@@ -29043,6 +29242,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
 let isNVStorable = 1;
@@ -29059,6 +29259,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -29073,6 +29274,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -29087,6 +29289,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -29102,6 +29305,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
@@ -29317,6 +29521,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
@@ -29336,6 +29541,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
@@ -29355,6 +29561,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
@@ -29372,6 +29579,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29387,6 +29595,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29402,6 +29611,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29418,6 +29628,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
@@ -29437,6 +29648,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
@@ -29456,6 +29668,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
@@ -29474,6 +29687,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
@@ -29492,6 +29706,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
@@ -29509,6 +29724,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29524,6 +29740,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29539,6 +29756,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let isNonTemporal = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
@@ -29555,6 +29773,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
 let CextOpcode = "V6_vS32b";
@@ -29573,6 +29792,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
 let isNVStorable = 1;
@@ -29591,6 +29811,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ai";
 let isNVStorable = 1;
@@ -29608,6 +29829,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_pi";
 let isNVStorable = 1;
@@ -29625,6 +29847,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let BaseOpcode = "V6_vS32b_ppu";
 let isNVStorable = 1;
@@ -29641,6 +29864,7 @@
 let addrMode = BaseImmOffset;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -29655,6 +29879,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -29669,6 +29894,7 @@
 let addrMode = PostInc;
 let accessSize = HVXVectorAccess;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
@@ -29719,6 +29945,52 @@
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Rx32 = $Rx32in";
 }
+def V6_vS64b_ai : HInst<
+(outs),
+(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxWR:$Vss32),
+"vmem($Rt32+#$Ii) = $Vss32",
+tc_9aff7a2a, TypeCVI_VM_ST>, Enc_b98b95, Requires<[UseHVXV73]> {
+let Inst{7-5} = 0b010;
+let Inst{12-11} = 0b00;
+let Inst{31-21} = 0b00101000011;
+let addrMode = BaseImmOffset;
+let accessSize = HVXVectorAccess;
+let isCVI = 1;
+let isHVXALU = 1;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vS64b_pi : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxWR:$Vss32),
+"vmem($Rx32++#$Ii) = $Vss32",
+tc_227864f7, TypeCVI_VM_ST>, Enc_b025d6, Requires<[UseHVXV73]> {
+let Inst{7-5} = 0b010;
+let Inst{13-11} = 0b000;
+let Inst{31-21} = 0b00101001011;
+let addrMode = PostInc;
+let accessSize = HVXVectorAccess;
+let isCVI = 1;
+let isHVXALU = 1;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
+def V6_vS64b_ppu : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxWR:$Vss32),
+"vmem($Rx32++$Mu2) = $Vss32",
+tc_227864f7, TypeCVI_VM_ST>, Enc_046afa, Requires<[UseHVXV73]> {
+let Inst{12-5} = 0b00000010;
+let Inst{31-21} = 0b00101011011;
+let addrMode = PostInc;
+let accessSize = HVXVectorAccess;
+let isCVI = 1;
+let isHVXALU = 1;
+let mayStore = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Rx32 = $Rx32in";
+}
 def V6_vabs_hf : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
@@ -29756,6 +30028,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vabsb_alt : HInst<
@@ -29781,6 +30055,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vabsb_sat_alt : HInst<
@@ -29906,6 +30182,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vabsh_alt : HInst<
@@ -29931,6 +30209,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vabsh_sat_alt : HInst<
@@ -29992,6 +30272,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vabsw_alt : HInst<
@@ -30017,6 +30299,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vabsw_sat_alt : HInst<
@@ -30122,6 +30406,19 @@
 let isCVI = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vadd_sf_bf : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.sf = vadd($Vu32.bf,$Vv32.bf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV73,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vadd_sf_hf : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -30159,6 +30456,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vaddb_alt : HInst<
@@ -30211,6 +30510,8 @@
 let opNewValue = 0;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -30241,6 +30542,8 @@
 let opNewValue = 0;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -30269,6 +30572,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vaddbsat_alt : HInst<
@@ -30319,6 +30624,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -30332,9 +30639,9 @@
 let Inst{31-21} = 0b00011101101;
 let hasNewValue = 1;
 let opNewValue = 0;
-let hasNewValue2 = 1;
-let opNewValue2 = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vaddcarrysat : HInst<
@@ -30348,6 +30655,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vaddclbh : HInst<
@@ -30387,6 +30696,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vaddh_alt : HInst<
@@ -30439,6 +30750,8 @@
 let opNewValue = 0;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -30469,6 +30782,8 @@
 let opNewValue = 0;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -30497,6 +30812,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vaddhsat_alt : HInst<
@@ -30655,6 +30972,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vaddubsat_alt : HInst<
@@ -30705,6 +31024,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vadduhsat : HInst<
@@ -30718,6 +31039,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vadduhsat_alt : HInst<
@@ -30822,6 +31145,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vadduwsat_alt : HInst<
@@ -30872,6 +31197,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vaddw_alt : HInst<
@@ -30924,6 +31251,8 @@
 let opNewValue = 0;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -30954,6 +31283,8 @@
 let opNewValue = 0;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -30982,6 +31313,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vaddwsat_alt : HInst<
@@ -31057,6 +31390,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vandnqrt : HInst<
@@ -31179,6 +31514,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vandvqv : HInst<
@@ -31193,6 +31530,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vandvrt : HInst<
@@ -31624,6 +31963,7 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let hasUnaryRestriction = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vasrvuhubsat : HInst<
@@ -31637,6 +31977,7 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let hasUnaryRestriction = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vasrvwuhrndsat : HInst<
@@ -31650,6 +31991,7 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let hasUnaryRestriction = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vasrvwuhsat : HInst<
@@ -31663,6 +32005,7 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let hasUnaryRestriction = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vasrw : HInst<
@@ -31820,6 +32163,7 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vassign_fp : HInst<
@@ -31839,7 +32183,7 @@
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
 "$Vd32.tmp = $Vu32",
-tc_2120355e, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV69]> {
+tc_e2fdd6e6, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV69]> {
 let Inst{7-5} = 0b110;
 let Inst{13-13} = 0b0;
 let Inst{31-16} = 0b0001111000000001;
@@ -31871,6 +32215,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavgb_alt : HInst<
@@ -31896,6 +32242,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavgbrnd_alt : HInst<
@@ -31921,6 +32269,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavgh_alt : HInst<
@@ -31946,6 +32296,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavghrnd_alt : HInst<
@@ -31971,6 +32323,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavgub_alt : HInst<
@@ -31996,6 +32350,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavgubrnd_alt : HInst<
@@ -32021,6 +32377,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavguh_alt : HInst<
@@ -32046,6 +32404,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavguhrnd_alt : HInst<
@@ -32071,6 +32431,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavguw_alt : HInst<
@@ -32096,6 +32458,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavguwrnd_alt : HInst<
@@ -32121,6 +32485,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavgw_alt : HInst<
@@ -32146,6 +32512,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vavgwrnd_alt : HInst<
@@ -32236,6 +32604,7 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vcombine : HInst<
@@ -32256,7 +32625,7 @@
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
 "$Vdd32.tmp = vcombine($Vu32,$Vv32)",
-tc_aa047364, TypeCVI_VX>, Enc_71bb9b, Requires<[UseHVXV69]> {
+tc_531b383c, TypeCVI_VX>, Enc_71bb9b, Requires<[UseHVXV69]> {
 let Inst{7-5} = 0b111;
 let Inst{13-13} = 0b0;
 let Inst{31-21} = 0b00011110101;
@@ -32266,6 +32635,32 @@
 let hasHvxTmp = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vconv_h_hf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.h = $Vu32.hf",
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV73]> {
+let Inst{7-5} = 0b010;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_hf_h : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.hf = $Vu32.h",
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV73]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vconv_hf_qf16 : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
@@ -32305,6 +32700,32 @@
 let isCVI = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vconv_sf_w : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.sf = $Vu32.w",
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV73]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vconv_w_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32),
+"$Vd32.w = $Vu32.sf",
+tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV73]> {
+let Inst{7-5} = 0b001;
+let Inst{13-13} = 0b1;
+let Inst{31-16} = 0b0001111000000101;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vcvt_b_hf : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -32318,6 +32739,19 @@
 let isCVI = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vcvt_bf_sf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.bf = vcvt($Vu32.sf,$Vv32.sf)",
+tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV73,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b011;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vcvt_h_hf : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32),
@@ -33155,6 +33589,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_veqb_and : HInst<
@@ -33166,6 +33602,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33179,6 +33617,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33191,6 +33631,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33205,6 +33647,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_veqh_and : HInst<
@@ -33216,6 +33660,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33229,6 +33675,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33241,6 +33689,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33255,6 +33705,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_veqw_and : HInst<
@@ -33266,6 +33718,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33279,6 +33733,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33291,6 +33747,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33384,6 +33842,7 @@
 let accessSize = HalfWordAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let Defs = [VTMP];
 let DecoderNamespace = "EXT_mmvec";
@@ -33400,6 +33859,7 @@
 let accessSize = HalfWordAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let Defs = [VTMP];
 let DecoderNamespace = "EXT_mmvec";
@@ -33448,6 +33908,7 @@
 let accessSize = WordAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let Defs = [VTMP];
 let DecoderNamespace = "EXT_mmvec";
@@ -33464,6 +33925,7 @@
 let accessSize = WordAccess;
 let isCVLoad = 1;
 let isCVI = 1;
+let isHVXALU = 1;
 let mayLoad = 1;
 let Defs = [VTMP];
 let DecoderNamespace = "EXT_mmvec";
@@ -33479,6 +33941,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vgtb_and : HInst<
@@ -33490,6 +33954,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33503,6 +33969,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33515,6 +33983,66 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtbf : HInst<
+(outs HvxQR:$Qd4),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qd4 = vcmp.gt($Vu32.bf,$Vv32.bf)",
+tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV73,UseHVXQFloat]> {
+let Inst{7-2} = 0b011110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vgtbf_and : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 &= vcmp.gt($Vu32.bf,$Vv32.bf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV73,UseHVXQFloat]> {
+let Inst{7-2} = 0b110100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtbf_or : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 |= vcmp.gt($Vu32.bf,$Vv32.bf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV73,UseHVXQFloat]> {
+let Inst{7-2} = 0b001110;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isAccumulator = 1;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Qx4 = $Qx4in";
+}
+def V6_vgtbf_xor : HInst<
+(outs HvxQR:$Qx4),
+(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Qx4 ^= vcmp.gt($Vu32.bf,$Vv32.bf)",
+tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV73,UseHVXQFloat]> {
+let Inst{7-2} = 0b111100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011100100;
+let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33529,6 +34057,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vgth_and : HInst<
@@ -33540,6 +34070,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33553,6 +34085,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33565,6 +34099,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33579,6 +34115,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vgthf_and : HInst<
@@ -33590,6 +34128,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33603,6 +34143,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33615,6 +34157,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33629,6 +34173,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vgtsf_and : HInst<
@@ -33640,6 +34186,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33653,6 +34201,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33665,6 +34215,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33679,6 +34231,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vgtub_and : HInst<
@@ -33690,6 +34244,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33703,6 +34259,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33715,6 +34273,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33729,6 +34289,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vgtuh_and : HInst<
@@ -33740,6 +34302,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33753,6 +34317,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33765,6 +34331,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33779,6 +34347,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vgtuw_and : HInst<
@@ -33790,6 +34360,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33803,6 +34375,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33815,6 +34389,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33829,6 +34405,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vgtw_and : HInst<
@@ -33840,6 +34418,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33853,6 +34433,8 @@
 let Inst{31-21} = 0b00011100100;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -33865,6 +34447,8 @@
 let Inst{13-13} = 0b1;
 let Inst{31-21} = 0b00011100100;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -34187,6 +34771,19 @@
 let isCVI = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vmax_bf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.bf = vmax($Vu32.bf,$Vv32.bf)",
+tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV73,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b111;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vmax_hf : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -34198,6 +34795,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vmax_sf : HInst<
@@ -34211,6 +34810,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vmaxb : HInst<
@@ -34224,6 +34825,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vmaxb_alt : HInst<
@@ -34249,6 +34852,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vmaxh_alt : HInst<
@@ -34274,6 +34879,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vmaxub_alt : HInst<
@@ -34299,6 +34906,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vmaxuh_alt : HInst<
@@ -34324,6 +34933,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vmaxw_alt : HInst<
@@ -34338,6 +34949,19 @@
 let isCodeGenOnly = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vmin_bf : HInst<
+(outs HvxVR:$Vd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vd32.bf = vmin($Vu32.bf,$Vv32.bf)",
+tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV73,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vmin_hf : HInst<
 (outs HvxVR:$Vd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -34349,6 +34973,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vmin_sf : HInst<
@@ -34362,6 +34988,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vminb : HInst<
@@ -34375,6 +35003,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vminb_alt : HInst<
@@ -34400,6 +35030,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vminh_alt : HInst<
@@ -34425,6 +35057,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vminub_alt : HInst<
@@ -34450,6 +35084,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vminuh_alt : HInst<
@@ -34475,6 +35111,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vminw_alt : HInst<
@@ -34929,6 +35567,34 @@
 let isCVI = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vmpy_sf_bf : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.sf = vmpy($Vu32.bf,$Vv32.bf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV73,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b100;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
+def V6_vmpy_sf_bf_acc : HInst<
+(outs HvxWR:$Vxx32),
+(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vxx32.sf += vmpy($Vu32.bf,$Vv32.bf)",
+tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV73,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b000;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isAccumulator = 1;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+let Constraints = "$Vxx32 = $Vxx32in";
+}
 def V6_vmpy_sf_hf : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -36216,6 +36882,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vnavgb : HInst<
@@ -36229,6 +36897,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vnavgb_alt : HInst<
@@ -36254,6 +36924,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vnavgh_alt : HInst<
@@ -36279,6 +36951,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vnavgub_alt : HInst<
@@ -36304,6 +36978,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vnavgw_alt : HInst<
@@ -36346,6 +37022,7 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vnormamth : HInst<
@@ -36409,6 +37086,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vor : HInst<
@@ -36422,6 +37101,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vpackeb : HInst<
@@ -37725,6 +38406,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsathub : HInst<
@@ -37738,6 +38421,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsathub_alt : HInst<
@@ -37763,6 +38448,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsatuwuh_alt : HInst<
@@ -37788,6 +38475,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsatwh_alt : HInst<
@@ -37836,6 +38525,8 @@
 let Inst{31-21} = 0b00101111001;
 let accessSize = HalfWordAccess;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -37849,6 +38540,8 @@
 let accessSize = HalfWordAccess;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -37882,6 +38575,8 @@
 let Inst{31-21} = 0b00101111100;
 let accessSize = HalfWordAccess;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -37941,6 +38636,8 @@
 let Inst{31-21} = 0b00101111001;
 let accessSize = WordAccess;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -37954,6 +38651,8 @@
 let accessSize = WordAccess;
 let isAccumulator = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -38018,6 +38717,8 @@
 let Inst{31-21} = 0b00101111100;
 let accessSize = WordAccess;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let mayStore = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
@@ -38067,6 +38768,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vshufeh_alt : HInst<
@@ -38133,6 +38836,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vshuffeb_alt : HInst<
@@ -38183,6 +38888,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vshuffob_alt : HInst<
@@ -38271,6 +38978,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vshufoh_alt : HInst<
@@ -38376,6 +39085,19 @@
 let isCVI = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
+def V6_vsub_sf_bf : HInst<
+(outs HvxWR:$Vdd32),
+(ins HvxVR:$Vu32, HvxVR:$Vv32),
+"$Vdd32.sf = vsub($Vu32.bf,$Vv32.bf)",
+tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV73,UseHVXIEEEFP]> {
+let Inst{7-5} = 0b101;
+let Inst{13-13} = 0b1;
+let Inst{31-21} = 0b00011101010;
+let hasNewValue = 1;
+let opNewValue = 0;
+let isCVI = 1;
+let DecoderNamespace = "EXT_mmvec";
+}
 def V6_vsub_sf_hf : HInst<
 (outs HvxWR:$Vdd32),
 (ins HvxVR:$Vu32, HvxVR:$Vv32),
@@ -38413,6 +39135,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubb_alt : HInst<
@@ -38464,6 +39188,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -38492,6 +39218,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -38519,6 +39247,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubbsat_alt : HInst<
@@ -38569,6 +39299,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Qx4 = $Qx4in";
 }
@@ -38582,9 +39314,9 @@
 let Inst{31-21} = 0b00011101101;
 let hasNewValue = 1;
 let opNewValue = 0;
-let hasNewValue2 = 1;
-let opNewValue2 = 1;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubh : HInst<
@@ -38598,6 +39330,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubh_alt : HInst<
@@ -38649,6 +39383,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -38677,6 +39413,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -38704,6 +39442,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubhsat_alt : HInst<
@@ -38804,6 +39544,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsububsat_alt : HInst<
@@ -38854,6 +39596,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubuhsat : HInst<
@@ -38867,6 +39611,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubuhsat_alt : HInst<
@@ -38942,6 +39688,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubuwsat_alt : HInst<
@@ -38992,6 +39740,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubw_alt : HInst<
@@ -39043,6 +39793,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -39071,6 +39823,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 let Constraints = "$Vx32 = $Vx32in";
 }
@@ -39098,6 +39852,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vsubwsat_alt : HInst<
@@ -39581,6 +40337,8 @@
 let hasNewValue = 1;
 let opNewValue = 0;
 let isCVI = 1;
+let isHVXALU = 1;
+let isHVXALU2SRC = 1;
 let DecoderNamespace = "EXT_mmvec";
 }
 def V6_vzb : HInst<
@@ -39774,11 +40532,24 @@
 let Inst{31-16} = 0b0110110000100000;
 let isSolo = 1;
 }
+def Y2_crswap0 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in),
+"crswap($Rx32,sgp0)",
+tc_7dc63b5c, TypeCR>, Enc_403871 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01100101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Uses = [SGP0];
+let Defs = [SGP0];
+let Constraints = "$Rx32 = $Rx32in";
+}
 def Y2_crswap_old : HInst<
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in),
 "crswap($Rx32,sgp)",
-PSEUDO, TypeMAPPING> {
+tc_7dc63b5c, TypeMAPPING> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let isPseudo = 1;
@@ -39918,6 +40689,30 @@
 let Inst{31-21} = 0b01100100010;
 let isSolo = 1;
 }
+def Y4_crswap1 : HInst<
+(outs IntRegs:$Rx32),
+(ins IntRegs:$Rx32in),
+"crswap($Rx32,sgp1)",
+tc_7dc63b5c, TypeCR>, Enc_403871 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01100101001;
+let hasNewValue = 1;
+let opNewValue = 0;
+let Uses = [SGP1];
+let Defs = [SGP1];
+let Constraints = "$Rx32 = $Rx32in";
+}
+def Y4_crswap10 : HInst<
+(outs DoubleRegs:$Rxx32),
+(ins DoubleRegs:$Rxx32in, sgp10Const:$sgp10),
+"crswap($Rxx32,$sgp10)",
+tc_27106296, TypeCR>, Enc_d0fe02 {
+let Inst{13-0} = 0b00000000000000;
+let Inst{31-21} = 0b01101101100;
+let Uses = [SGP0, SGP1];
+let Defs = [SGP0, SGP1];
+let Constraints = "$Rxx32 = $Rxx32in";
+}
 def Y4_l2fetch : HInst<
 (outs),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
@@ -40102,7 +40897,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, IntRegs:$Rt32),
 "$Rd32 = add($Rs32,$Rt32)",
-tc_388f9897, TypeALU32_3op>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_3op>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let AsmVariantName = "NonParsable";
@@ -40112,7 +40907,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = add($Rs32,#$Ii)",
-tc_388f9897, TypeALU32_ADDI>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_ADDI>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let AsmVariantName = "NonParsable";
@@ -40127,7 +40922,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = and($Rs32,#$Ii)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let AsmVariantName = "NonParsable";
@@ -40142,7 +40937,7 @@
 (outs DoubleRegs:$Rdd32),
 (ins s32_0Imm:$Ii, s8_0Imm:$II),
 "$Rdd32 = combine(#$Ii,#$II)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> {
 let AsmVariantName = "NonParsable";
 let isPseudo = 1;
 let isExtendable = 1;
@@ -40155,7 +40950,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = sxtb($Rs32)",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let AsmVariantName = "NonParsable";
@@ -40165,7 +40960,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = sxth($Rs32)",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let AsmVariantName = "NonParsable";
@@ -40175,7 +40970,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = $Rs32",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let AsmVariantName = "NonParsable";
@@ -40185,7 +40980,7 @@
 (outs IntRegs:$Rd32),
 (ins s32_0Imm:$Ii),
 "$Rd32 = #$Ii",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let AsmVariantName = "NonParsable";
@@ -40200,7 +40995,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = zxtb($Rs32)",
-PSEUDO, TypeMAPPING>, Requires<[HasV69]> {
+PSEUDO, TypeMAPPING>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let AsmVariantName = "NonParsable";
@@ -40210,7 +41005,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32),
 "$Rd32 = zxth($Rs32)",
-tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> {
+tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let AsmVariantName = "NonParsable";
@@ -40220,7 +41015,7 @@
 (outs DoubleRegs:$Rdd32),
 (ins s8_0Imm:$Ii, u32_0Imm:$II),
 "$Rdd32 = combine(#$Ii,#$II)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> {
 let AsmVariantName = "NonParsable";
 let isPseudo = 1;
 let isExtendable = 1;
@@ -40233,7 +41028,7 @@
 (outs DoubleRegs:$Rdd32),
 (ins s32_0Imm:$Ii, IntRegs:$Rs32),
 "$Rdd32 = combine(#$Ii,$Rs32)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> {
 let AsmVariantName = "NonParsable";
 let isPseudo = 1;
 let isExtendable = 1;
@@ -40246,7 +41041,7 @@
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rdd32 = combine($Rs32,#$Ii)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> {
 let AsmVariantName = "NonParsable";
 let isPseudo = 1;
 let isExtendable = 1;
@@ -40259,7 +41054,7 @@
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii),
 "if (!$Pu4) $Rd32 = #$Ii",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> {
 let isPredicated = 1;
 let isPredicatedFalse = 1;
 let hasNewValue = 1;
@@ -40276,7 +41071,7 @@
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii),
 "if ($Pu4) $Rd32 = #$Ii",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> {
 let isPredicated = 1;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -40292,7 +41087,7 @@
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii),
 "if (!$Pu4.new) $Rd32 = #$Ii",
-tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> {
+tc_4ac61d92, TypeALU32_2op>, Requires<[HasV73]> {
 let isPredicated = 1;
 let isPredicatedFalse = 1;
 let hasNewValue = 1;
@@ -40310,7 +41105,7 @@
 (outs IntRegs:$Rd32),
 (ins PredRegs:$Pu4, s32_0Imm:$Ii),
 "if ($Pu4.new) $Rd32 = #$Ii",
-tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> {
+tc_4ac61d92, TypeALU32_2op>, Requires<[HasV73]> {
 let isPredicated = 1;
 let hasNewValue = 1;
 let opNewValue = 0;
@@ -40327,7 +41122,7 @@
 (outs PredRegs:$Pd4),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Pd4 = cmp.eq($Rs32,#$Ii)",
-tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> {
+tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> {
 let AsmVariantName = "NonParsable";
 let isPseudo = 1;
 let isExtendable = 1;
@@ -40340,7 +41135,7 @@
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32),
 "$Rdd32 = deallocframe($Rs32):raw",
-tc_aee6250c, TypeLD>, Requires<[HasV69]> {
+tc_aee6250c, TypeLD>, Requires<[HasV73]> {
 let accessSize = DoubleWordAccess;
 let AsmVariantName = "NonParsable";
 let mayLoad = 1;
@@ -40352,7 +41147,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = memb($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV69]> {
+tc_eed07714, TypeLD>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let addrMode = BaseImmOffset;
@@ -40370,7 +41165,7 @@
 (outs DoubleRegs:$Rdd32),
 (ins IntRegs:$Rs32, s29_3Imm:$Ii),
 "$Rdd32 = memd($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV69]> {
+tc_eed07714, TypeLD>, Requires<[HasV73]> {
 let addrMode = BaseImmOffset;
 let accessSize = DoubleWordAccess;
 let AsmVariantName = "NonParsable";
@@ -40386,7 +41181,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii),
 "$Rd32 = memh($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV69]> {
+tc_eed07714, TypeLD>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let addrMode = BaseImmOffset;
@@ -40404,7 +41199,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s30_2Imm:$Ii),
 "$Rd32 = memw($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV69]> {
+tc_eed07714, TypeLD>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let addrMode = BaseImmOffset;
@@ -40422,7 +41217,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii),
 "$Rd32 = memub($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV69]> {
+tc_eed07714, TypeLD>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let addrMode = BaseImmOffset;
@@ -40440,7 +41235,7 @@
 (outs IntRegs:$Rd32),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii),
 "$Rd32 = memuh($Rs32+#$Ii)",
-tc_eed07714, TypeLD>, Requires<[HasV69]> {
+tc_eed07714, TypeLD>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let addrMode = BaseImmOffset;
@@ -40458,7 +41253,7 @@
 (outs IntRegs:$Rx32),
 (ins IntRegs:$Rx32in, u11_3Imm:$Ii),
 "allocframe($Rx32,#$Ii):raw",
-tc_74a42bda, TypeST>, Requires<[HasV69]> {
+tc_74a42bda, TypeST>, Requires<[HasV73]> {
 let hasNewValue = 1;
 let opNewValue = 0;
 let addrMode = BaseImmOffset;
@@ -40474,7 +41269,7 @@
 (outs),
 (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32),
 "memb($Rs32+#$Ii) = $Rt32",
-tc_a9edeffa, TypeST>, Requires<[HasV69]> {
+tc_a9edeffa, TypeST>, Requires<[HasV73]> {
 let addrMode = BaseImmOffset;
 let accessSize = ByteAccess;
 let AsmVariantName = "NonParsable";
@@ -40490,7 +41285,7 @@
 (outs),
 (ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32),
 "memd($Rs32+#$Ii) = $Rtt32",
-tc_a9edeffa, TypeST>, Requires<[HasV69]> {
+tc_a9edeffa, TypeST>, Requires<[HasV73]> {
 let addrMode = BaseImmOffset;
 let accessSize = DoubleWordAccess;
 let AsmVariantName = "NonParsable";
@@ -40506,7 +41301,7 @@
 (outs),
 (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32),
 "memh($Rs32+#$Ii) = $Rt32",
-tc_a9edeffa, TypeST>, Requires<[HasV69]> {
+tc_a9edeffa, TypeST>, Requires<[HasV73]> {
 let addrMode = BaseImmOffset;
 let accessSize = HalfWordAccess;
 let AsmVariantName = "NonParsable";
@@ -40522,7 +41317,7 @@
 (outs),
 (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32),
 "memw($Rs32+#$Ii) = $Rt32",
-tc_a9edeffa, TypeST>, Requires<[HasV69]> {
+tc_a9edeffa, TypeST>, Requires<[HasV73]> {
 let addrMode = BaseImmOffset;
 let accessSize = WordAccess;
 let AsmVariantName = "NonParsable";
@@ -40538,7 +41333,7 @@
 (outs),
 (ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II),
 "memb($Rs32+#$Ii) = #$II",
-tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> {
+tc_838c4d7a, TypeV4LDST>, Requires<[HasV73]> {
 let addrMode = BaseImmOffset;
 let accessSize = ByteAccess;
 let AsmVariantName = "NonParsable";
@@ -40554,7 +41349,7 @@
 (outs),
 (ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II),
 "memw($Rs32+#$Ii) = #$II",
-tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> {
+tc_838c4d7a, TypeV4LDST>, Requires<[HasV73]> {
 let addrMode = BaseImmOffset;
 let accessSize = WordAccess;
 let AsmVariantName = "NonParsable";
Index: llvm/lib/Target/Hexagon/HexagonDepInstrIntrinsics.inc
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepInstrIntrinsics.inc
+++ llvm/lib/Target/Hexagon/HexagonDepInstrIntrinsics.inc
@@ -1015,6 +1015,7 @@
 {Hexagon::V6_vadd_qf32, Intrinsic::hexagon_V6_vadd_qf32, Intrinsic::hexagon_V6_vadd_qf32_128B},
 {Hexagon::V6_vadd_qf32_mix, Intrinsic::hexagon_V6_vadd_qf32_mix, Intrinsic::hexagon_V6_vadd_qf32_mix_128B},
 {Hexagon::V6_vadd_sf, Intrinsic::hexagon_V6_vadd_sf, Intrinsic::hexagon_V6_vadd_sf_128B},
+{Hexagon::V6_vadd_sf_bf, Intrinsic::hexagon_V6_vadd_sf_bf, Intrinsic::hexagon_V6_vadd_sf_bf_128B},
 {Hexagon::V6_vadd_sf_hf, Intrinsic::hexagon_V6_vadd_sf_hf, Intrinsic::hexagon_V6_vadd_sf_hf_128B},
 {Hexagon::V6_vadd_sf_sf, Intrinsic::hexagon_V6_vadd_sf_sf, Intrinsic::hexagon_V6_vadd_sf_sf_128B},
 {Hexagon::V6_vaddb, Intrinsic::hexagon_V6_vaddb, Intrinsic::hexagon_V6_vaddb_128B},
@@ -1112,10 +1113,15 @@
 {Hexagon::V6_vcl0h, Intrinsic::hexagon_V6_vcl0h, Intrinsic::hexagon_V6_vcl0h_128B},
 {Hexagon::V6_vcl0w, Intrinsic::hexagon_V6_vcl0w, Intrinsic::hexagon_V6_vcl0w_128B},
 {Hexagon::V6_vcombine, Intrinsic::hexagon_V6_vcombine, Intrinsic::hexagon_V6_vcombine_128B},
+{Hexagon::V6_vconv_h_hf, Intrinsic::hexagon_V6_vconv_h_hf, Intrinsic::hexagon_V6_vconv_h_hf_128B},
+{Hexagon::V6_vconv_hf_h, Intrinsic::hexagon_V6_vconv_hf_h, Intrinsic::hexagon_V6_vconv_hf_h_128B},
 {Hexagon::V6_vconv_hf_qf16, Intrinsic::hexagon_V6_vconv_hf_qf16, Intrinsic::hexagon_V6_vconv_hf_qf16_128B},
 {Hexagon::V6_vconv_hf_qf32, Intrinsic::hexagon_V6_vconv_hf_qf32, Intrinsic::hexagon_V6_vconv_hf_qf32_128B},
 {Hexagon::V6_vconv_sf_qf32, Intrinsic::hexagon_V6_vconv_sf_qf32, Intrinsic::hexagon_V6_vconv_sf_qf32_128B},
+{Hexagon::V6_vconv_sf_w, Intrinsic::hexagon_V6_vconv_sf_w, Intrinsic::hexagon_V6_vconv_sf_w_128B},
+{Hexagon::V6_vconv_w_sf, Intrinsic::hexagon_V6_vconv_w_sf, Intrinsic::hexagon_V6_vconv_w_sf_128B},
 {Hexagon::V6_vcvt_b_hf, Intrinsic::hexagon_V6_vcvt_b_hf, Intrinsic::hexagon_V6_vcvt_b_hf_128B},
+{Hexagon::V6_vcvt_bf_sf, Intrinsic::hexagon_V6_vcvt_bf_sf, Intrinsic::hexagon_V6_vcvt_bf_sf_128B},
 {Hexagon::V6_vcvt_h_hf, Intrinsic::hexagon_V6_vcvt_h_hf, Intrinsic::hexagon_V6_vcvt_h_hf_128B},
 {Hexagon::V6_vcvt_hf_b, Intrinsic::hexagon_V6_vcvt_hf_b, Intrinsic::hexagon_V6_vcvt_hf_b_128B},
 {Hexagon::V6_vcvt_hf_h, Intrinsic::hexagon_V6_vcvt_hf_h, Intrinsic::hexagon_V6_vcvt_hf_h_128B},
@@ -1182,6 +1188,10 @@
 {Hexagon::V6_vgtb_and, Intrinsic::hexagon_V6_vgtb_and, Intrinsic::hexagon_V6_vgtb_and_128B},
 {Hexagon::V6_vgtb_or, Intrinsic::hexagon_V6_vgtb_or, Intrinsic::hexagon_V6_vgtb_or_128B},
 {Hexagon::V6_vgtb_xor, Intrinsic::hexagon_V6_vgtb_xor, Intrinsic::hexagon_V6_vgtb_xor_128B},
+{Hexagon::V6_vgtbf, Intrinsic::hexagon_V6_vgtbf, Intrinsic::hexagon_V6_vgtbf_128B},
+{Hexagon::V6_vgtbf_and, Intrinsic::hexagon_V6_vgtbf_and, Intrinsic::hexagon_V6_vgtbf_and_128B},
+{Hexagon::V6_vgtbf_or, Intrinsic::hexagon_V6_vgtbf_or, Intrinsic::hexagon_V6_vgtbf_or_128B},
+{Hexagon::V6_vgtbf_xor, Intrinsic::hexagon_V6_vgtbf_xor, Intrinsic::hexagon_V6_vgtbf_xor_128B},
 {Hexagon::V6_vgth, Intrinsic::hexagon_V6_vgth, Intrinsic::hexagon_V6_vgth_128B},
 {Hexagon::V6_vgth_and, Intrinsic::hexagon_V6_vgth_and, Intrinsic::hexagon_V6_vgth_and_128B},
 {Hexagon::V6_vgth_or, Intrinsic::hexagon_V6_vgth_or, Intrinsic::hexagon_V6_vgth_or_128B},
@@ -1229,6 +1239,7 @@
 {Hexagon::V6_vlutvwh_oracc, Intrinsic::hexagon_V6_vlutvwh_oracc, Intrinsic::hexagon_V6_vlutvwh_oracc_128B},
 {Hexagon::V6_vlutvwh_oracci, Intrinsic::hexagon_V6_vlutvwh_oracci, Intrinsic::hexagon_V6_vlutvwh_oracci_128B},
 {Hexagon::V6_vlutvwhi, Intrinsic::hexagon_V6_vlutvwhi, Intrinsic::hexagon_V6_vlutvwhi_128B},
+{Hexagon::V6_vmax_bf, Intrinsic::hexagon_V6_vmax_bf, Intrinsic::hexagon_V6_vmax_bf_128B},
 {Hexagon::V6_vmax_hf, Intrinsic::hexagon_V6_vmax_hf, Intrinsic::hexagon_V6_vmax_hf_128B},
 {Hexagon::V6_vmax_sf, Intrinsic::hexagon_V6_vmax_sf, Intrinsic::hexagon_V6_vmax_sf_128B},
 {Hexagon::V6_vmaxb, Intrinsic::hexagon_V6_vmaxb, Intrinsic::hexagon_V6_vmaxb_128B},
@@ -1236,6 +1247,7 @@
 {Hexagon::V6_vmaxub, Intrinsic::hexagon_V6_vmaxub, Intrinsic::hexagon_V6_vmaxub_128B},
 {Hexagon::V6_vmaxuh, Intrinsic::hexagon_V6_vmaxuh, Intrinsic::hexagon_V6_vmaxuh_128B},
 {Hexagon::V6_vmaxw, Intrinsic::hexagon_V6_vmaxw, Intrinsic::hexagon_V6_vmaxw_128B},
+{Hexagon::V6_vmin_bf, Intrinsic::hexagon_V6_vmin_bf, Intrinsic::hexagon_V6_vmin_bf_128B},
 {Hexagon::V6_vmin_hf, Intrinsic::hexagon_V6_vmin_hf, Intrinsic::hexagon_V6_vmin_hf_128B},
 {Hexagon::V6_vmin_sf, Intrinsic::hexagon_V6_vmin_sf, Intrinsic::hexagon_V6_vmin_sf_128B},
 {Hexagon::V6_vminb, Intrinsic::hexagon_V6_vminb, Intrinsic::hexagon_V6_vminb_128B},
@@ -1266,6 +1278,8 @@
 {Hexagon::V6_vmpy_qf32_mix_hf, Intrinsic::hexagon_V6_vmpy_qf32_mix_hf, Intrinsic::hexagon_V6_vmpy_qf32_mix_hf_128B},
 {Hexagon::V6_vmpy_qf32_qf16, Intrinsic::hexagon_V6_vmpy_qf32_qf16, Intrinsic::hexagon_V6_vmpy_qf32_qf16_128B},
 {Hexagon::V6_vmpy_qf32_sf, Intrinsic::hexagon_V6_vmpy_qf32_sf, Intrinsic::hexagon_V6_vmpy_qf32_sf_128B},
+{Hexagon::V6_vmpy_sf_bf, Intrinsic::hexagon_V6_vmpy_sf_bf, Intrinsic::hexagon_V6_vmpy_sf_bf_128B},
+{Hexagon::V6_vmpy_sf_bf_acc, Intrinsic::hexagon_V6_vmpy_sf_bf_acc, Intrinsic::hexagon_V6_vmpy_sf_bf_acc_128B},
 {Hexagon::V6_vmpy_sf_hf, Intrinsic::hexagon_V6_vmpy_sf_hf, Intrinsic::hexagon_V6_vmpy_sf_hf_128B},
 {Hexagon::V6_vmpy_sf_hf_acc, Intrinsic::hexagon_V6_vmpy_sf_hf_acc, Intrinsic::hexagon_V6_vmpy_sf_hf_acc_128B},
 {Hexagon::V6_vmpy_sf_sf, Intrinsic::hexagon_V6_vmpy_sf_sf, Intrinsic::hexagon_V6_vmpy_sf_sf_128B},
@@ -1399,6 +1413,7 @@
 {Hexagon::V6_vsub_qf32, Intrinsic::hexagon_V6_vsub_qf32, Intrinsic::hexagon_V6_vsub_qf32_128B},
 {Hexagon::V6_vsub_qf32_mix, Intrinsic::hexagon_V6_vsub_qf32_mix, Intrinsic::hexagon_V6_vsub_qf32_mix_128B},
 {Hexagon::V6_vsub_sf, Intrinsic::hexagon_V6_vsub_sf, Intrinsic::hexagon_V6_vsub_sf_128B},
+{Hexagon::V6_vsub_sf_bf, Intrinsic::hexagon_V6_vsub_sf_bf, Intrinsic::hexagon_V6_vsub_sf_bf_128B},
 {Hexagon::V6_vsub_sf_hf, Intrinsic::hexagon_V6_vsub_sf_hf, Intrinsic::hexagon_V6_vsub_sf_hf_128B},
 {Hexagon::V6_vsub_sf_sf, Intrinsic::hexagon_V6_vsub_sf_sf, Intrinsic::hexagon_V6_vsub_sf_sf_128B},
 {Hexagon::V6_vsubb, Intrinsic::hexagon_V6_vsubb, Intrinsic::hexagon_V6_vsubb_128B},
Index: llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
+++ llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td
@@ -3677,3 +3677,66 @@
          (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV69, UseHVX64B]>;
 def: Pat<(int_hexagon_V6_vmpyuhvs_128B HvxVR:$src1, HvxVR:$src2),
          (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV69, UseHVX128B]>;
+
+// V73 HVX Instructions.
+
+def: Pat<(int_hexagon_V6_vadd_sf_bf HvxVR:$src1, HvxVR:$src2),
+         (V6_vadd_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vadd_sf_bf_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vadd_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vconv_h_hf HvxVR:$src1),
+         (V6_vconv_h_hf HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vconv_h_hf_128B HvxVR:$src1),
+         (V6_vconv_h_hf HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vconv_hf_h HvxVR:$src1),
+         (V6_vconv_hf_h HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vconv_hf_h_128B HvxVR:$src1),
+         (V6_vconv_hf_h HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vconv_sf_w HvxVR:$src1),
+         (V6_vconv_sf_w HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vconv_sf_w_128B HvxVR:$src1),
+         (V6_vconv_sf_w HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vconv_w_sf HvxVR:$src1),
+         (V6_vconv_w_sf HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vconv_w_sf_128B HvxVR:$src1),
+         (V6_vconv_w_sf HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vcvt_bf_sf HvxVR:$src1, HvxVR:$src2),
+         (V6_vcvt_bf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vcvt_bf_sf_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vcvt_bf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vgtbf HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtbf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtbf_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vgtbf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtbf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtbf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtbf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtbf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtbf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtbf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtbf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtbf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtbf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtbf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX64B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vgtbf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vgtbf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX128B, UseHVXQFloat]>;
+def: Pat<(int_hexagon_V6_vmax_bf HvxVR:$src1, HvxVR:$src2),
+         (V6_vmax_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmax_bf_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmax_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmin_bf HvxVR:$src1, HvxVR:$src2),
+         (V6_vmin_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmin_bf_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmin_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_bf HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpy_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_bf_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vmpy_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_bf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpy_sf_bf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vmpy_sf_bf_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3),
+         (V6_vmpy_sf_bf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX128B]>;
+def: Pat<(int_hexagon_V6_vsub_sf_bf HvxVR:$src1, HvxVR:$src2),
+         (V6_vsub_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>;
+def: Pat<(int_hexagon_V6_vsub_sf_bf_128B HvxVR:$src1, HvxVR:$src2),
+         (V6_vsub_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>;
Index: llvm/lib/Target/Hexagon/HexagonDepMappings.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepMappings.td
+++ llvm/lib/Target/Hexagon/HexagonDepMappings.td
@@ -165,6 +165,8 @@
 def V6_MAP_equw_andAlias : InstAlias<"$Qx4 &= vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_and HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
 def V6_MAP_equw_iorAlias : InstAlias<"$Qx4 |= vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_or HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
 def V6_MAP_equw_xorAlias : InstAlias<"$Qx4 ^= vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_xor HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>;
+def V6_dbl_ld0Alias : InstAlias<"$Vdd32 = vmem($Rt32)", (V6_vL64b_ai HvxWR:$Vdd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def V6_dbl_st0Alias : InstAlias<"vmem($Rt32) = $Vss32", (V6_vS64b_ai IntRegs:$Rt32, 0, HvxWR:$Vss32)>, Requires<[UseHVX]>;
 def V6_extractw_altAlias : InstAlias<"$Rd32.w = vextract($Vu32,$Rs32)", (V6_extractw IntRegs:$Rd32, HvxVR:$Vu32, IntRegs:$Rs32)>, Requires<[UseHVX]>;
 def V6_ld0Alias : InstAlias<"$Vd32 = vmem($Rt32)", (V6_vL32b_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
 def V6_ldcnp0Alias : InstAlias<"if (!$Pv4) $Vd32.cur = vmem($Rt32)", (V6_vL32b_cur_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>;
@@ -471,4 +473,5 @@
 def V6_vzh_altAlias : InstAlias<"$Vdd32 = vzxth($Vu32)", (V6_vzh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>;
 def V6_zld0Alias : InstAlias<"z = vmem($Rt32)", (V6_zLd_ai IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
 def V6_zldp0Alias : InstAlias<"if ($Pv4) z = vmem($Rt32)", (V6_zLd_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>;
+def Y2_crswap_oldAlias : InstAlias<"crswap($Rx32,sgp)", (Y2_crswap0 IntRegs:$Rx32)>;
 def Y2_dcfetchAlias : InstAlias<"dcfetch($Rs32)", (Y2_dcfetchbo IntRegs:$Rs32, 0)>;
Index: llvm/lib/Target/Hexagon/HexagonDepMask.h
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepMask.h
+++ llvm/lib/Target/Hexagon/HexagonDepMask.h
@@ -8,7 +8,6 @@
 // Automatically generated file, do not edit!
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H
 
@@ -2817,4 +2816,4 @@
   0 }
 };
 
-#endif  // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H
+#endif  // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H
\ No newline at end of file
Index: llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
===================================================================
--- llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
+++ llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h
@@ -8,7 +8,6 @@
 // Automatically generated file, do not edit!
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H
 
@@ -53,9 +52,11 @@
   case Hexagon::Sched::tc_c57d9f39:
   case Hexagon::Sched::tc_d33e5eee:
   case Hexagon::Sched::tc_decdde8a:
+  case Hexagon::Sched::tc_e60def48:
   case Hexagon::Sched::tc_ed03645c:
   case Hexagon::Sched::tc_eeda4109:
   case Hexagon::Sched::tc_ef921005:
+  case Hexagon::Sched::tc_f97707c1:
   case Hexagon::Sched::tc_f999c66e:
     return true;
   default:
@@ -97,6 +98,7 @@
 
 inline bool is_TC2early(unsigned SchedClass) {
   switch (SchedClass) {
+  case Hexagon::Sched::tc_33e7e673:
   case Hexagon::Sched::tc_45f9d1be:
   case Hexagon::Sched::tc_a4ee89db:
     return true;
@@ -110,6 +112,7 @@
   case Hexagon::Sched::tc_01e1be3b:
   case Hexagon::Sched::tc_1248597c:
   case Hexagon::Sched::tc_197dce51:
+  case Hexagon::Sched::tc_27106296:
   case Hexagon::Sched::tc_28e55c6f:
   case Hexagon::Sched::tc_2c3e17fc:
   case Hexagon::Sched::tc_38382228:
@@ -121,6 +124,7 @@
   case Hexagon::Sched::tc_6ae3426b:
   case Hexagon::Sched::tc_6d861a95:
   case Hexagon::Sched::tc_788b1d09:
+  case Hexagon::Sched::tc_7dc63b5c:
   case Hexagon::Sched::tc_7f8ae742:
   case Hexagon::Sched::tc_9406230a:
   case Hexagon::Sched::tc_a154b476:
@@ -153,4 +157,4 @@
 }
 } // namespace llvm
 
-#endif  // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H
+#endif  // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H
\ No newline at end of file
Index: llvm/lib/Target/Hexagon/HexagonInstrFormats.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -166,6 +166,15 @@
   bit isCVI = 0;
   let TSFlags{59} = isCVI;
 
+  bit isHVXALU = 0;
+  let TSFlags{60} = isHVXALU;
+
+  bit isHVXALU2SRC = 0;
+  let TSFlags{61} = isHVXALU2SRC;
+
+  bit hasUnaryRestriction = 0;
+  let TSFlags{62} = hasUnaryRestriction;
+
   // Fields used for relation models.
   bit isNonTemporal = 0;
   string isNT = ""; // set to "true" for non-temporal vector stores.
Index: llvm/lib/Target/Hexagon/HexagonOperands.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonOperands.td
+++ llvm/lib/Target/Hexagon/HexagonOperands.td
@@ -23,10 +23,13 @@
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<9>(v);
 }]>;
+
 def u64_0ImmOperand : AsmOperandClass { let Name = "u64_0Imm"; let RenderMethod = "addImmOperands"; }
 def u64_0Imm : Operand<i64> { let ParserMatchClass = u64_0ImmOperand; }
 def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; }
 def n1Const : Operand<i32> { let ParserMatchClass = n1ConstOperand; }
+def sgp10ConstOperand : AsmOperandClass { let Name = "sgp10Const"; }
+def sgp10Const : Operand<i32> { let ParserMatchClass = sgp10ConstOperand; }
 
 def bblabel : Operand<i32>;
 def bbl     : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
Index: llvm/lib/Target/Hexagon/HexagonSchedule.td
===================================================================
--- llvm/lib/Target/Hexagon/HexagonSchedule.td
+++ llvm/lib/Target/Hexagon/HexagonSchedule.td
@@ -70,3 +70,6 @@
 include "HexagonScheduleV67T.td"
 include "HexagonScheduleV68.td"
 include "HexagonScheduleV69.td"
+include "HexagonScheduleV71.td"
+include "HexagonScheduleV71T.td"
+include "HexagonScheduleV73.td"
Index: llvm/lib/Target/Hexagon/HexagonScheduleV71.td
===================================================================
--- /dev/null
+++ llvm/lib/Target/Hexagon/HexagonScheduleV71.td
@@ -0,0 +1,39 @@
+//=-HexagonScheduleV71.td - HexagonV71 Scheduling Definitions *- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// ScalarItin and HVXItin contain some old itineraries still used by a handful
+// of instructions. Hopefully, we will be able to get rid of them soon.
+def HexagonV71ItinList : DepScalarItinV71, ScalarItin,
+                         DepHVXItinV71, HVXItin, PseudoItin {
+  list<InstrItinData> ItinList =
+    !listconcat(DepScalarItinV71_list, ScalarItin_list,
+                DepHVXItinV71_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV71 :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+                            CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+                            CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+                            CVI_ALL_NOMEM, CVI_ZW],
+                            [Hex_FWD, HVX_FWD],
+                            HexagonV71ItinList.ItinList>;
+
+def HexagonModelV71 : SchedMachineModel {
+  // Max issue per cycle == bundle width.
+  let IssueWidth = 4;
+  let Itineraries = HexagonItinerariesV71;
+  let LoadLatency = 1;
+  let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V71 Resource Definitions -
+//===----------------------------------------------------------------------===//
+
Index: llvm/lib/Target/Hexagon/HexagonScheduleV71T.td
===================================================================
--- /dev/null
+++ llvm/lib/Target/Hexagon/HexagonScheduleV71T.td
@@ -0,0 +1,59 @@
+//=-HexagonScheduleV71T.td - Hexagon V71 Tiny Core Scheduling Definition ----=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class HexagonV71TPseudoItin {
+  list<InstrItinData> V71TPseudoItin_list = [
+    InstrItinData<PSEUDO, [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 1, 1],
+                          [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+                            InstrStage<1, [SLOT2, SLOT3]>],
+                           [2, 1, 1],
+                           [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData<DUPLEX, [InstrStage<1, [SLOT0]>],
+                          [2, 1, 1]>,
+    InstrItinData<tc_ENDLOOP, [InstrStage<1, [SLOT_ENDLOOP]>], [2]>
+  ];
+}
+
+//
+// HVXItin contains some old itineraries still used by a handful of
+// instructions. Hopefully, we will be able to get rid of them soon.
+def HexagonV71TItinList : DepScalarItinV71T, DepHVXItinV71, HVXItin,
+                          HexagonV71TPseudoItin {
+  list<InstrItinData> V71TItin_list = [
+    InstrItinData<LD_tc_ld_SLOT01, [InstrStage<1, [SLOT0]>],
+                                   [3, 1, 1],
+                                   [Hex_FWD, Hex_FWD, Hex_FWD]>,
+    InstrItinData<ST_tc_st_SLOT01, [InstrStage<1, [SLOT0]>],
+                                   [1, 1, 3, 3],
+                                   [Hex_FWD, Hex_FWD]>
+  ];
+  list<InstrItinData> ItinList =
+    !listconcat(DepScalarItinV71T_list, V71TItin_list, DepHVXItinV71_list,
+                HVXItin_list, V71TPseudoItin_list);
+}
+
+def HexagonItinerariesV71T :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+                            CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+                            CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+                            CVI_ALL_NOMEM, CVI_ZW],
+                            [Hex_FWD, HVX_FWD],
+                            HexagonV71TItinList.ItinList>;
+
+def HexagonModelV71T : SchedMachineModel {
+  let IssueWidth = 3;
+  let Itineraries = HexagonItinerariesV71T;
+  let LoadLatency = 1;
+  let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V71 Tiny Core Resource Definitions -
+//===----------------------------------------------------------------------===//
Index: llvm/lib/Target/Hexagon/HexagonScheduleV73.td
===================================================================
--- /dev/null
+++ llvm/lib/Target/Hexagon/HexagonScheduleV73.td
@@ -0,0 +1,39 @@
+//=-HexagonScheduleV73.td - HexagonV73 Scheduling Definitions *- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// ScalarItin HVXItin contain some old itineraries // still used by a handful
+// of instructions. Hopefully, we will be able to get rid of them soon.
+def HexagonV73ItinList : DepScalarItinV73, ScalarItin,
+                         DepHVXItinV73, HVXItin, PseudoItin {
+  list<InstrItinData> ItinList =
+    !listconcat(DepScalarItinV73_list, ScalarItin_list,
+                DepHVXItinV73_list, HVXItin_list, PseudoItin_list);
+}
+
+def HexagonItinerariesV73 :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+                            CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+                            CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL,
+                            CVI_ALL_NOMEM, CVI_ZW],
+                            [Hex_FWD, HVX_FWD],
+                            HexagonV73ItinList.ItinList>;
+
+def HexagonModelV73 : SchedMachineModel {
+  // Max issue per cycle == bundle width.
+  let IssueWidth = 4;
+  let Itineraries = HexagonItinerariesV73;
+  let LoadLatency = 1;
+  let CompleteModel = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V73 Resource Definitions -
+//===----------------------------------------------------------------------===//
+
Index: llvm/lib/Target/Hexagon/HexagonSubtarget.h
===================================================================
--- llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -95,11 +95,11 @@
   enum HexagonProcFamilyEnum { Others, TinyCore };
 
   std::string CPUString;
+  HexagonProcFamilyEnum HexagonProcFamily = Others;
   Triple TargetTriple;
 
   // The following objects can use the TargetTriple, so they must be
   // declared after it.
-  HexagonProcFamilyEnum HexagonProcFamily = Others;
   HexagonInstrInfo InstrInfo;
   HexagonRegisterInfo RegInfo;
   HexagonTargetLowering TLInfo;
@@ -198,6 +198,18 @@
   bool hasV69OpsOnly() const {
     return getHexagonArchVersion() == Hexagon::ArchEnum::V69;
   }
+  bool hasV71Ops() const {
+    return getHexagonArchVersion() >= Hexagon::ArchEnum::V71;
+  }
+  bool hasV71OpsOnly() const {
+    return getHexagonArchVersion() == Hexagon::ArchEnum::V71;
+  }
+  bool hasV73Ops() const {
+    return getHexagonArchVersion() >= Hexagon::ArchEnum::V73;
+  }
+  bool hasV73OpsOnly() const {
+    return getHexagonArchVersion() == Hexagon::ArchEnum::V73;
+  }
 
   bool useAudioOps() const { return UseAudioOps; }
   bool useCompound() const { return UseCompound; }
@@ -243,6 +255,12 @@
   bool useHVXV69Ops() const {
     return HexagonHVXVersion >= Hexagon::ArchEnum::V69;
   }
+  bool useHVXV71Ops() const {
+    return HexagonHVXVersion >= Hexagon::ArchEnum::V71;
+  }
+  bool useHVXV73Ops() const {
+    return HexagonHVXVersion >= Hexagon::ArchEnum::V73;
+  }
   bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; }
   bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; }
 
Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
===================================================================
--- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -162,6 +162,15 @@
 
     isCVIPos = 59,
     isCVIMask = 0x1,
+
+    isHVXALUPos = 60,
+    isHVXALUMask = 0x1,
+
+    isHVXALU2SRCPos = 61,
+    isHVXALU2SRCMask = 0x1,
+
+    hasUnaryRestrictionPos = 62,
+    hasUnaryRestrictionMask = 0x1,
   };
 
   // *** The code above must match HexagonInstrFormat*.td *** //
Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
===================================================================
--- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -84,24 +84,30 @@
                    cl::init(false));
 cl::opt<bool> MV69("mv69", cl::Hidden, cl::desc("Build for Hexagon V69"),
                    cl::init(false));
-
-cl::opt<Hexagon::ArchEnum>
-    EnableHVX("mhvx",
-      cl::desc("Enable Hexagon Vector eXtensions"),
-      cl::values(
-        clEnumValN(Hexagon::ArchEnum::V60, "v60", "Build for HVX v60"),
-        clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"),
-        clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"),
-        clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"),
-        clEnumValN(Hexagon::ArchEnum::V67, "v67", "Build for HVX v67"),
-        clEnumValN(Hexagon::ArchEnum::V68, "v68", "Build for HVX v68"),
-        clEnumValN(Hexagon::ArchEnum::V69, "v69", "Build for HVX v69"),
-        // Sentinel for no value specified.
-        clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
-      // Sentinel for flag not present.
-      cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional);
+cl::opt<bool> MV71("mv71", cl::Hidden, cl::desc("Build for Hexagon V71"),
+                   cl::init(false));
+cl::opt<bool> MV71T("mv71t", cl::Hidden, cl::desc("Build for Hexagon V71T"),
+                    cl::init(false));
+cl::opt<bool> MV73("mv73", cl::Hidden, cl::desc("Build for Hexagon V73"),
+                   cl::init(false));
 } // namespace
 
+cl::opt<Hexagon::ArchEnum> EnableHVX(
+    "mhvx", cl::desc("Enable Hexagon Vector eXtensions"),
+    cl::values(clEnumValN(Hexagon::ArchEnum::V60, "v60", "Build for HVX v60"),
+               clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"),
+               clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"),
+               clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"),
+               clEnumValN(Hexagon::ArchEnum::V67, "v67", "Build for HVX v67"),
+               clEnumValN(Hexagon::ArchEnum::V68, "v68", "Build for HVX v68"),
+               clEnumValN(Hexagon::ArchEnum::V69, "v69", "Build for HVX v69"),
+               clEnumValN(Hexagon::ArchEnum::V71, "v71", "Build for HVX v71"),
+               clEnumValN(Hexagon::ArchEnum::V73, "v73", "Build for HVX v73"),
+               // Sentinel for no value specified.
+               clEnumValN(Hexagon::ArchEnum::Generic, "", "")),
+    // Sentinel for flag not present.
+    cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional);
+
 static cl::opt<bool>
   DisableHVX("mno-hvx", cl::Hidden,
              cl::desc("Disable Hexagon Vector eXtensions"));
@@ -135,6 +141,12 @@
     return "hexagonv68";
   if (MV69)
     return "hexagonv69";
+  if (MV71)
+    return "hexagonv71";
+  if (MV71T)
+    return "hexagonv71t";
+  if (MV73)
+    return "hexagonv73";
   return "";
 }
 
@@ -143,10 +155,10 @@
   if (!ArchV.empty() && !CPU.empty()) {
     // Tiny cores have a "t" suffix that is discarded when creating a secondary
     // non-tiny subtarget.  See: addArchSubtarget
-    std::pair<StringRef,StringRef> ArchP = ArchV.split('t');
-    std::pair<StringRef,StringRef> CPUP = CPU.split('t');
+    std::pair<StringRef, StringRef> ArchP = ArchV.split('t');
+    std::pair<StringRef, StringRef> CPUP = CPU.split('t');
     if (!ArchP.first.equals(CPUP.first))
-        report_fatal_error("conflicting architectures specified.");
+      report_fatal_error("conflicting architectures specified.");
     return CPU;
   }
   if (ArchV.empty()) {
@@ -391,6 +403,12 @@
   case Hexagon::ArchEnum::V69:
     Result.push_back("+hvxv69");
     break;
+  case Hexagon::ArchEnum::V71:
+    Result.push_back("+hvxv71");
+    break;
+  case Hexagon::ArchEnum::V73:
+    Result.push_back("+hvxv73");
+    break;
   case Hexagon::ArchEnum::Generic:{
     Result.push_back(StringSwitch<StringRef>(CPU)
              .Case("hexagonv60", "+hvxv60")
@@ -400,7 +418,10 @@
              .Case("hexagonv67", "+hvxv67")
              .Case("hexagonv67t", "+hvxv67")
              .Case("hexagonv68", "+hvxv68")
-             .Case("hexagonv69", "+hvxv69"));
+             .Case("hexagonv69", "+hvxv69")
+             .Case("hexagonv71", "+hvxv71")
+             .Case("hexagonv71t", "+hvxv71")
+             .Case("hexagonv73", "+hvxv73"));
     break;
   }
   case Hexagon::ArchEnum::NoArch:
@@ -448,8 +469,8 @@
   // turns on hvxvNN, corresponding to the existing ArchVNN.
   FeatureBitset FB = S;
   unsigned CpuArch = ArchV5;
-  for (unsigned F : {ArchV69, ArchV68, ArchV67, ArchV66, ArchV65, ArchV62,
-                     ArchV60, ArchV55, ArchV5}) {
+  for (unsigned F : {ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, ArchV66,
+                     ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) {
     if (!FB.test(F))
       continue;
     CpuArch = F;
@@ -465,7 +486,7 @@
   bool HasHvxVer = false;
   for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65,
                      ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68,
-                     ExtensionHVXV69}) {
+                     ExtensionHVXV69, ExtensionHVXV71, ExtensionHVXV73}) {
     if (!FB.test(F))
       continue;
     HasHvxVer = true;
@@ -478,27 +499,33 @@
 
   // HasHvxVer is false, and UseHvx is true.
   switch (CpuArch) {
+  case ArchV73:
+    FB.set(ExtensionHVXV73);
+    [[fallthrough]];
+  case ArchV71:
+    FB.set(ExtensionHVXV71);
+    [[fallthrough]];
   case ArchV69:
     FB.set(ExtensionHVXV69);
     [[fallthrough]];
-    case ArchV68:
-      FB.set(ExtensionHVXV68);
-      [[fallthrough]];
-    case ArchV67:
-      FB.set(ExtensionHVXV67);
-      [[fallthrough]];
-    case ArchV66:
-      FB.set(ExtensionHVXV66);
-      [[fallthrough]];
-    case ArchV65:
-      FB.set(ExtensionHVXV65);
-      [[fallthrough]];
-    case ArchV62:
-      FB.set(ExtensionHVXV62);
-      [[fallthrough]];
-    case ArchV60:
-      FB.set(ExtensionHVXV60);
-      break;
+  case ArchV68:
+    FB.set(ExtensionHVXV68);
+    [[fallthrough]];
+  case ArchV67:
+    FB.set(ExtensionHVXV67);
+    [[fallthrough]];
+  case ArchV66:
+    FB.set(ExtensionHVXV66);
+    [[fallthrough]];
+  case ArchV65:
+    FB.set(ExtensionHVXV65);
+    [[fallthrough]];
+  case ArchV62:
+    FB.set(ExtensionHVXV62);
+    [[fallthrough]];
+  case ArchV60:
+    FB.set(ExtensionHVXV60);
+    break;
   }
   return FB;
 }
@@ -512,11 +539,11 @@
 
   MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl(
       TT, CPUName, /*TuneCPU*/ CPUName, ArchFS);
-  if (X != nullptr && (CPUName == "hexagonv67t"))
+  if (X != nullptr && (CPUName == "hexagonv67t" || CPUName == "hexagon71t"))
     addArchSubtarget(X, ArchFS);
 
   if (CPU.equals("help"))
-      exit(0);
+    exit(0);
 
   if (!isCPUValid(CPUName.str())) {
     errs() << "error: invalid CPU \"" << CPUName.str().c_str()
@@ -552,8 +579,7 @@
   return X;
 }
 
-void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI,
-                                  StringRef FS) {
+void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS) {
   assert(STI != nullptr);
   if (STI->getCPU().contains("t")) {
     auto ArchSTI = createHexagonMCSubtargetInfo(
@@ -577,7 +603,10 @@
       .Case("hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67)
       .Case("hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T)
       .Case("hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68)
-      .Case("hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69);
+      .Case("hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69)
+      .Case("hexagonv71", llvm::ELF::EF_HEXAGON_MACH_V71)
+      .Case("hexagonv71t", llvm::ELF::EF_HEXAGON_MACH_V71T)
+      .Case("hexagonv73", llvm::ELF::EF_HEXAGON_MACH_V73);
 }
 
 llvm::ArrayRef<MCPhysReg> Hexagon_MC::GetVectRegRev() {
@@ -606,12 +635,12 @@
       return false;
 
     //assert(!HexagonMCInstrInfo::isBundle(Inst));
-    if(!HexagonMCInstrInfo::isExtendable(*Info, Inst))
+    if (!HexagonMCInstrInfo::isExtendable(*Info, Inst))
       return false;
     auto const &Extended(HexagonMCInstrInfo::getExtendableOperand(*Info, Inst));
     assert(Extended.isExpr());
     int64_t Value;
-    if(!Extended.getExpr()->evaluateAsAbsolute(Value))
+    if (!Extended.getExpr()->evaluateAsAbsolute(Value))
       return false;
     Target = Value;
     return true;
@@ -637,8 +666,8 @@
                                     createHexagonMCRegisterInfo);
 
   // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(getTheHexagonTarget(),
-    Hexagon_MC::createHexagonMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(
+      getTheHexagonTarget(), Hexagon_MC::createHexagonMCSubtargetInfo);
 
   // Register the MC Code Emitter
   TargetRegistry::RegisterMCCodeEmitter(getTheHexagonTarget(),
@@ -648,18 +677,16 @@
   TargetRegistry::RegisterMCAsmBackend(getTheHexagonTarget(),
                                        createHexagonAsmBackend);
 
-
   // Register the MC instruction analyzer.
   TargetRegistry::RegisterMCInstrAnalysis(getTheHexagonTarget(),
                                           createHexagonMCInstrAnalysis);
 
   // Register the obj streamer
-  TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(),
-                                      createMCStreamer);
+  TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(), createMCStreamer);
 
   // Register the obj target streamer
-  TargetRegistry::RegisterObjectTargetStreamer(getTheHexagonTarget(),
-                                      createHexagonObjectTargetStreamer);
+  TargetRegistry::RegisterObjectTargetStreamer(
+      getTheHexagonTarget(), createHexagonObjectTargetStreamer);
 
   // Register the asm streamer
   TargetRegistry::RegisterAsmTargetStreamer(getTheHexagonTarget(),
Index: llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
+++ llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
@@ -2,73 +2,79 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=1024 %s | FileCheck -check-prefixes=OPT,MAX1024 %s
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=-1 %s | FileCheck -check-prefixes=OPT,ALL %s
 
-declare void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
-declare void @llvm.memcpy.p1i8.p3i8.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p3i8.p1i8.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1
+declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1
+declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #1
+declare void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(5)* nocapture readonly, i32, i1) #1
+declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1
 
-declare void @llvm.memmove.p1i8.p1i8.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
-declare void @llvm.memmove.p1i8.p3i8.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p5i8.p5i8.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
+declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1
+declare void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1
+declare void @llvm.memmove.p5i8.p5i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(5)* nocapture readonly, i32, i1) #1
 
-declare void @llvm.memset.p1i8.i64(ptr addrspace(1) nocapture, i8, i64, i1) #1
+declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) #1
 
 ; Test the upper bound for sizes to leave
-define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; MAX1024-LABEL: @max_size_small_static_memcpy_caller0(
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 1024, i1 false)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 addrspace(1)* [[SRC:%.*]], i64 1024, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @max_size_small_static_memcpy_caller0(
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; ALL-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; ALL:       load-store-loop:
-; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
-; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; ALL-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; ALL-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 1
+; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 1
+; ALL-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; ALL-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; ALL-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; ALL:       memcpy-split:
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1024, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
   ret void
 }
 
 ; Smallest static size which will be expanded
-define amdgpu_kernel void @min_size_large_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @min_size_large_static_memcpy_caller0(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 1
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 1
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1024
-; OPT-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 1
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1024
-; OPT-NEXT:    store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 1
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1024
+; OPT-NEXT:    [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 1
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1024
+; OPT-NEXT:    store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 1
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1025, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @max_size_small_static_memmove_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; MAX1024-LABEL: @max_size_small_static_memmove_caller0(
-; MAX1024-NEXT:    call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 1024, i1 false)
+; MAX1024-NEXT:    call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 addrspace(1)* [[SRC:%.*]], i64 1024, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @max_size_small_static_memmove_caller0(
-; ALL-NEXT:    [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]]
+; ALL-NEXT:    [[COMPARE_SRC_DST:%.*]] = icmp ult i8 addrspace(1)* [[SRC:%.*]], [[DST:%.*]]
 ; ALL-NEXT:    [[COMPARE_N_TO_0:%.*]] = icmp eq i64 1024, 0
 ; ALL-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[COPY_BACKWARDS:%.*]], label [[COPY_FORWARD:%.*]]
 ; ALL:       copy_backwards:
@@ -76,33 +82,33 @@
 ; ALL:       copy_backwards_loop:
 ; ALL-NEXT:    [[TMP1:%.*]] = phi i64 [ [[INDEX_PTR:%.*]], [[COPY_BACKWARDS_LOOP]] ], [ 1024, [[COPY_BACKWARDS]] ]
 ; ALL-NEXT:    [[INDEX_PTR]] = sub i64 [[TMP1]], 1
-; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR]]
-; ALL-NEXT:    [[ELEMENT:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 1
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR]]
-; ALL-NEXT:    store i8 [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
+; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[INDEX_PTR]]
+; ALL-NEXT:    [[ELEMENT:%.*]] = load i8, i8 addrspace(1)* [[TMP2]], align 1
+; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST]], i64 [[INDEX_PTR]]
+; ALL-NEXT:    store i8 [[ELEMENT]], i8 addrspace(1)* [[TMP3]], align 1
 ; ALL-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_PTR]], 0
 ; ALL-NEXT:    br i1 [[TMP4]], label [[MEMMOVE_DONE]], label [[COPY_BACKWARDS_LOOP]]
 ; ALL:       copy_forward:
 ; ALL-NEXT:    br i1 [[COMPARE_N_TO_0]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP:%.*]]
 ; ALL:       copy_forward_loop:
 ; ALL-NEXT:    [[INDEX_PTR1:%.*]] = phi i64 [ [[INDEX_INCREMENT:%.*]], [[COPY_FORWARD_LOOP]] ], [ 0, [[COPY_FORWARD]] ]
-; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR1]]
-; ALL-NEXT:    [[ELEMENT2:%.*]] = load i8, ptr addrspace(1) [[TMP5]], align 1
-; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR1]]
-; ALL-NEXT:    store i8 [[ELEMENT2]], ptr addrspace(1) [[TMP6]], align 1
+; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[INDEX_PTR1]]
+; ALL-NEXT:    [[ELEMENT2:%.*]] = load i8, i8 addrspace(1)* [[TMP5]], align 1
+; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST]], i64 [[INDEX_PTR1]]
+; ALL-NEXT:    store i8 [[ELEMENT2]], i8 addrspace(1)* [[TMP6]], align 1
 ; ALL-NEXT:    [[INDEX_INCREMENT]] = add i64 [[INDEX_PTR1]], 1
 ; ALL-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_INCREMENT]], 1024
 ; ALL-NEXT:    br i1 [[TMP7]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP]]
 ; ALL:       memmove_done:
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memmove.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1024, i1 false)
+  call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @min_size_large_static_memmove_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @min_size_large_static_memmove_caller0(
-; OPT-NEXT:    [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]]
+; OPT-NEXT:    [[COMPARE_SRC_DST:%.*]] = icmp ult i8 addrspace(1)* [[SRC:%.*]], [[DST:%.*]]
 ; OPT-NEXT:    [[COMPARE_N_TO_0:%.*]] = icmp eq i64 1025, 0
 ; OPT-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[COPY_BACKWARDS:%.*]], label [[COPY_FORWARD:%.*]]
 ; OPT:       copy_backwards:
@@ -110,1200 +116,1376 @@
 ; OPT:       copy_backwards_loop:
 ; OPT-NEXT:    [[TMP1:%.*]] = phi i64 [ [[INDEX_PTR:%.*]], [[COPY_BACKWARDS_LOOP]] ], [ 1025, [[COPY_BACKWARDS]] ]
 ; OPT-NEXT:    [[INDEX_PTR]] = sub i64 [[TMP1]], 1
-; OPT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR]]
-; OPT-NEXT:    [[ELEMENT:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 1
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR]]
-; OPT-NEXT:    store i8 [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
+; OPT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[INDEX_PTR]]
+; OPT-NEXT:    [[ELEMENT:%.*]] = load i8, i8 addrspace(1)* [[TMP2]], align 1
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST]], i64 [[INDEX_PTR]]
+; OPT-NEXT:    store i8 [[ELEMENT]], i8 addrspace(1)* [[TMP3]], align 1
 ; OPT-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_PTR]], 0
 ; OPT-NEXT:    br i1 [[TMP4]], label [[MEMMOVE_DONE]], label [[COPY_BACKWARDS_LOOP]]
 ; OPT:       copy_forward:
 ; OPT-NEXT:    br i1 [[COMPARE_N_TO_0]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP:%.*]]
 ; OPT:       copy_forward_loop:
 ; OPT-NEXT:    [[INDEX_PTR1:%.*]] = phi i64 [ [[INDEX_INCREMENT:%.*]], [[COPY_FORWARD_LOOP]] ], [ 0, [[COPY_FORWARD]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR1]]
-; OPT-NEXT:    [[ELEMENT2:%.*]] = load i8, ptr addrspace(1) [[TMP5]], align 1
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR1]]
-; OPT-NEXT:    store i8 [[ELEMENT2]], ptr addrspace(1) [[TMP6]], align 1
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[INDEX_PTR1]]
+; OPT-NEXT:    [[ELEMENT2:%.*]] = load i8, i8 addrspace(1)* [[TMP5]], align 1
+; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST]], i64 [[INDEX_PTR1]]
+; OPT-NEXT:    store i8 [[ELEMENT2]], i8 addrspace(1)* [[TMP6]], align 1
 ; OPT-NEXT:    [[INDEX_INCREMENT]] = add i64 [[INDEX_PTR1]], 1
 ; OPT-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_INCREMENT]], 1025
 ; OPT-NEXT:    br i1 [[TMP7]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP]]
 ; OPT:       memmove_done:
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memmove.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1025, i1 false)
+  call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @max_size_small_static_memset_caller0(ptr addrspace(1) %dst, i8 %val) #0 {
+define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
 ; MAX1024-LABEL: @max_size_small_static_memset_caller0(
-; MAX1024-NEXT:    call void @llvm.memset.p1.i64(ptr addrspace(1) [[DST:%.*]], i8 [[VAL:%.*]], i64 1024, i1 false)
+; MAX1024-NEXT:    call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 1024, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @max_size_small_static_memset_caller0(
 ; ALL-NEXT:    br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
 ; ALL:       loadstoreloop:
 ; ALL-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
-; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]]
-; ALL-NEXT:    store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1
+; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
+; ALL-NEXT:    store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]], align 1
 ; ALL-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
 ; ALL-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1024
 ; ALL-NEXT:    br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
 ; ALL:       split:
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memset.p1i8.i64(ptr addrspace(1) %dst, i8 %val, i64 1024, i1 false)
+  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @min_size_large_static_memset_caller0(ptr addrspace(1) %dst, i8 %val) #0 {
+define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 {
 ; OPT-LABEL: @min_size_large_static_memset_caller0(
 ; OPT-NEXT:    br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]]
 ; OPT:       loadstoreloop:
 ; OPT-NEXT:    [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ]
-; OPT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]]
-; OPT-NEXT:    store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1
+; OPT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]]
+; OPT-NEXT:    store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]], align 1
 ; OPT-NEXT:    [[TMP3]] = add i64 [[TMP1]], 1
 ; OPT-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1025
 ; OPT-NEXT:    br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]]
 ; OPT:       split:
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memset.p1i8.i64(ptr addrspace(1) %dst, i8 %val, i64 1025, i1 false)
+  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @variable_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
 ; OPT-LABEL: @variable_memcpy_caller0(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16
-; OPT-NEXT:    [[TMP2:%.*]] = urem i64 [[N]], 16
-; OPT-NEXT:    [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16
+; OPT-NEXT:    [[TMP4:%.*]] = urem i64 [[N]], 16
+; OPT-NEXT:    [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1
-; OPT-NEXT:    [[TMP8]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1
+; OPT-NEXT:    [[TMP10]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1
-; OPT-NEXT:    [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1
+; OPT-NEXT:    [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @variable_memcpy_caller1(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
 ; OPT-LABEL: @variable_memcpy_caller1(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16
-; OPT-NEXT:    [[TMP2:%.*]] = urem i64 [[N]], 16
-; OPT-NEXT:    [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16
+; OPT-NEXT:    [[TMP4:%.*]] = urem i64 [[N]], 16
+; OPT-NEXT:    [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1
-; OPT-NEXT:    [[TMP8]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1
+; OPT-NEXT:    [[TMP10]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1
-; OPT-NEXT:    [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1
+; OPT-NEXT:    [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0, ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %n, i64 %m) #0 {
+define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 {
 ; OPT-LABEL: @memcpy_multi_use_one_function(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16
-; OPT-NEXT:    [[TMP2:%.*]] = urem i64 [[N]], 16
-; OPT-NEXT:    [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
-; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1
-; OPT-NEXT:    [[TMP8]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
-; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST0]], i64 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1
-; OPT-NEXT:    [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
-; OPT:       post-loop-memcpy-expansion:
-; OPT-NEXT:    [[TMP16:%.*]] = udiv i64 [[M:%.*]], 16
-; OPT-NEXT:    [[TMP17:%.*]] = urem i64 [[M]], 16
-; OPT-NEXT:    [[TMP18:%.*]] = sub i64 [[M]], [[TMP17]]
-; OPT-NEXT:    [[TMP19:%.*]] = icmp ne i64 [[TMP16]], 0
-; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_EXPANSION2:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST0:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16
+; OPT-NEXT:    [[TMP4:%.*]] = urem i64 [[N]], 16
+; OPT-NEXT:    [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION2:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5:%.*]]
 ; OPT:       loop-memcpy-expansion2:
-; OPT-NEXT:    [[LOOP_INDEX3:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP23:%.*]], [[LOOP_MEMCPY_EXPANSION2]] ]
-; OPT-NEXT:    [[TMP20:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX3]]
-; OPT-NEXT:    [[TMP21:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP20]], align 1
-; OPT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX3]]
-; OPT-NEXT:    store <4 x i32> [[TMP21]], ptr addrspace(1) [[TMP22]], align 1
-; OPT-NEXT:    [[TMP23]] = add i64 [[LOOP_INDEX3]], 1
-; OPT-NEXT:    [[TMP24:%.*]] = icmp ult i64 [[TMP23]], [[TMP16]]
-; OPT-NEXT:    br i1 [[TMP24]], label [[LOOP_MEMCPY_EXPANSION2]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5]]
+; OPT-NEXT:    [[LOOP_INDEX3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION2]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX3]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX3]]
+; OPT-NEXT:    store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1
+; OPT-NEXT:    [[TMP10]] = add i64 [[LOOP_INDEX3]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION2]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5]]
 ; OPT:       loop-memcpy-residual4:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX6:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER5]] ], [ [[TMP29:%.*]], [[LOOP_MEMCPY_RESIDUAL4:%.*]] ]
-; OPT-NEXT:    [[TMP25:%.*]] = add i64 [[TMP18]], [[RESIDUAL_LOOP_INDEX6]]
-; OPT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP25]]
-; OPT-NEXT:    [[TMP27:%.*]] = load i8, ptr addrspace(1) [[TMP26]], align 1
-; OPT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 [[TMP25]]
-; OPT-NEXT:    store i8 [[TMP27]], ptr addrspace(1) [[TMP28]], align 1
-; OPT-NEXT:    [[TMP29]] = add i64 [[RESIDUAL_LOOP_INDEX6]], 1
-; OPT-NEXT:    [[TMP30:%.*]] = icmp ult i64 [[TMP29]], [[TMP17]]
-; OPT-NEXT:    br i1 [[TMP30]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX6:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER5]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL4:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX6]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1
+; OPT-NEXT:    [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX6]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1:%.*]]
 ; OPT:       post-loop-memcpy-expansion1:
+; OPT-NEXT:    [[TMP20:%.*]] = bitcast i8 addrspace(1)* [[SRC]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP21:%.*]] = bitcast i8 addrspace(1)* [[DST1:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP22:%.*]] = udiv i64 [[M:%.*]], 16
+; OPT-NEXT:    [[TMP23:%.*]] = urem i64 [[M]], 16
+; OPT-NEXT:    [[TMP24:%.*]] = sub i64 [[M]], [[TMP23]]
+; OPT-NEXT:    [[TMP25:%.*]] = icmp ne i64 [[TMP22]], 0
+; OPT-NEXT:    br i1 [[TMP25]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT:       loop-memcpy-expansion:
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION1]] ], [ [[TMP29:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP20]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP27:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP26]], align 1
+; OPT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP21]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP27]], <4 x i32> addrspace(1)* [[TMP28]], align 1
+; OPT-NEXT:    [[TMP29]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP30:%.*]] = icmp ult i64 [[TMP29]], [[TMP22]]
+; OPT-NEXT:    br i1 [[TMP30]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT:       loop-memcpy-residual:
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP37:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP31:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP20]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP32:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP21]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP33:%.*]] = add i64 [[TMP24]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP31]], i64 [[TMP33]]
+; OPT-NEXT:    [[TMP35:%.*]] = load i8, i8 addrspace(1)* [[TMP34]], align 1
+; OPT-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP32]], i64 [[TMP33]]
+; OPT-NEXT:    store i8 [[TMP35]], i8 addrspace(1)* [[TMP36]], align 1
+; OPT-NEXT:    [[TMP37]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP38:%.*]] = icmp ult i64 [[TMP37]], [[TMP23]]
+; OPT-NEXT:    br i1 [[TMP38]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP31:%.*]] = icmp ne i64 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP31]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP39:%.*]] = icmp ne i64 [[TMP23]], 0
+; OPT-NEXT:    br i1 [[TMP39]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ; OPT:       loop-memcpy-residual-header5:
-; OPT-NEXT:    [[TMP32:%.*]] = icmp ne i64 [[TMP17]], 0
-; OPT-NEXT:    br i1 [[TMP32]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1]]
+; OPT-NEXT:    [[TMP40:%.*]] = icmp ne i64 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP40]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1]]
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst0, ptr addrspace(1) %src, i64 %n, i1 false)
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %m, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_alt_type(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
 ; OPT-LABEL: @memcpy_alt_type(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8
-; OPT-NEXT:    [[TMP2:%.*]] = urem i32 [[N]], 8
-; OPT-NEXT:    [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to <2 x i32> addrspace(3)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <2 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8
+; OPT-NEXT:    [[TMP4:%.*]] = urem i32 [[N]], 8
+; OPT-NEXT:    [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(3) [[TMP5]], align 1
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(1) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1
-; OPT-NEXT:    [[TMP8]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(3)* [[TMP7]], align 1
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store <2 x i32> [[TMP8]], <2 x i32> addrspace(1)* [[TMP9]], align 1
+; OPT-NEXT:    [[TMP10]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 1
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i32 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1
-; OPT-NEXT:    [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP1]] to i8 addrspace(3)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 1
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i32 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1
+; OPT-NEXT:    [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p1i8.p3i8.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n, i1 false)
+  call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i1 false)
   ret void
 }
 
 ; One of the uses in the function should be expanded, the other left alone.
-define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspace(1) %dst0, ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 {
 ; MAX1024-LABEL: @memcpy_multi_use_one_function_keep_small(
-; MAX1024-NEXT:    [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16
-; MAX1024-NEXT:    [[TMP2:%.*]] = urem i64 [[N]], 16
-; MAX1024-NEXT:    [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
-; MAX1024-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
-; MAX1024-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; MAX1024-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; MAX1024-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST0:%.*]] to <4 x i32> addrspace(1)*
+; MAX1024-NEXT:    [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16
+; MAX1024-NEXT:    [[TMP4:%.*]] = urem i64 [[N]], 16
+; MAX1024-NEXT:    [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
+; MAX1024-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0
+; MAX1024-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; MAX1024:       loop-memcpy-expansion:
-; MAX1024-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; MAX1024-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; MAX1024-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
-; MAX1024-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX]]
-; MAX1024-NEXT:    store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1
-; MAX1024-NEXT:    [[TMP8]] = add i64 [[LOOP_INDEX]], 1
-; MAX1024-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
-; MAX1024-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; MAX1024-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; MAX1024-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; MAX1024-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1
+; MAX1024-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; MAX1024-NEXT:    store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1
+; MAX1024-NEXT:    [[TMP10]] = add i64 [[LOOP_INDEX]], 1
+; MAX1024-NEXT:    [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]]
+; MAX1024-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; MAX1024:       loop-memcpy-residual:
-; MAX1024-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; MAX1024-NEXT:    [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; MAX1024-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
-; MAX1024-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1
-; MAX1024-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST0]], i64 [[TMP10]]
-; MAX1024-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1
-; MAX1024-NEXT:    [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
-; MAX1024-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
-; MAX1024-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; MAX1024-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; MAX1024-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; MAX1024-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; MAX1024-NEXT:    [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; MAX1024-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]]
+; MAX1024-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1
+; MAX1024-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]]
+; MAX1024-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1
+; MAX1024-NEXT:    [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
+; MAX1024-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]]
+; MAX1024-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; MAX1024:       post-loop-memcpy-expansion:
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST1:%.*]], ptr addrspace(1) [[SRC]], i64 102, i1 false)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* [[DST1:%.*]], i8 addrspace(1)* [[SRC]], i64 102, i1 false)
 ; MAX1024-NEXT:    ret void
 ; MAX1024:       loop-memcpy-residual-header:
-; MAX1024-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
-; MAX1024-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; MAX1024-NEXT:    [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0
+; MAX1024-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
 ; ALL-LABEL: @memcpy_multi_use_one_function_keep_small(
-; ALL-NEXT:    [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16
-; ALL-NEXT:    [[TMP2:%.*]] = urem i64 [[N]], 16
-; ALL-NEXT:    [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
-; ALL-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
-; ALL-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST0:%.*]] to <4 x i32> addrspace(1)*
+; ALL-NEXT:    [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16
+; ALL-NEXT:    [[TMP4:%.*]] = urem i64 [[N]], 16
+; ALL-NEXT:    [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
+; ALL-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0
+; ALL-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; ALL:       loop-memcpy-expansion:
-; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
-; ALL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1
-; ALL-NEXT:    [[TMP8]] = add i64 [[LOOP_INDEX]], 1
-; ALL-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
-; ALL-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; ALL-NEXT:    [[LOOP_INDEX1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; ALL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX1]]
+; ALL-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1
+; ALL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX1]]
+; ALL-NEXT:    store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1
+; ALL-NEXT:    [[TMP10]] = add i64 [[LOOP_INDEX1]], 1
+; ALL-NEXT:    [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]]
+; ALL-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; ALL:       loop-memcpy-residual:
-; ALL-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; ALL-NEXT:    [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; ALL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
-; ALL-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1
-; ALL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST0]], i64 [[TMP10]]
-; ALL-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1
-; ALL-NEXT:    [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
-; ALL-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
-; ALL-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; ALL-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; ALL-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; ALL-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; ALL-NEXT:    [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; ALL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]]
+; ALL-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1
+; ALL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]]
+; ALL-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1
+; ALL-NEXT:    [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
+; ALL-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]]
+; ALL-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; ALL:       post-loop-memcpy-expansion:
+; ALL-NEXT:    [[TMP20:%.*]] = bitcast i8 addrspace(1)* [[SRC]] to <4 x i32> addrspace(1)*
+; ALL-NEXT:    [[TMP21:%.*]] = bitcast i8 addrspace(1)* [[DST1:%.*]] to <4 x i32> addrspace(1)*
 ; ALL-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; ALL:       load-store-loop:
-; ALL-NEXT:    [[LOOP_INDEX1:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP19:%.*]], [[LOAD_STORE_LOOP]] ]
-; ALL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX1]]
-; ALL-NEXT:    [[TMP17:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP16]], align 1
-; ALL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX1]]
-; ALL-NEXT:    store <4 x i32> [[TMP17]], ptr addrspace(1) [[TMP18]], align 1
-; ALL-NEXT:    [[TMP19]] = add i64 [[LOOP_INDEX1]], 1
-; ALL-NEXT:    [[TMP20:%.*]] = icmp ult i64 [[TMP19]], 6
-; ALL-NEXT:    br i1 [[TMP20]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP25:%.*]], [[LOAD_STORE_LOOP]] ]
+; ALL-NEXT:    [[TMP22:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP20]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    [[TMP23:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP22]], align 1
+; ALL-NEXT:    [[TMP24:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP21]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    store <4 x i32> [[TMP23]], <4 x i32> addrspace(1)* [[TMP24]], align 1
+; ALL-NEXT:    [[TMP25]] = add i64 [[LOOP_INDEX]], 1
+; ALL-NEXT:    [[TMP26:%.*]] = icmp ult i64 [[TMP25]], 6
+; ALL-NEXT:    br i1 [[TMP26]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; ALL:       memcpy-split:
-; ALL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 24
-; ALL-NEXT:    [[TMP22:%.*]] = load i32, ptr addrspace(1) [[TMP21]], align 1
-; ALL-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST1]], i64 24
-; ALL-NEXT:    store i32 [[TMP22]], ptr addrspace(1) [[TMP23]], align 1
-; ALL-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 50
-; ALL-NEXT:    [[TMP25:%.*]] = load i16, ptr addrspace(1) [[TMP24]], align 1
-; ALL-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST1]], i64 50
-; ALL-NEXT:    store i16 [[TMP25]], ptr addrspace(1) [[TMP26]], align 1
+; ALL-NEXT:    [[TMP27:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP20]] to i32 addrspace(1)*
+; ALL-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP27]], i64 24
+; ALL-NEXT:    [[TMP29:%.*]] = load i32, i32 addrspace(1)* [[TMP28]], align 1
+; ALL-NEXT:    [[TMP30:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP21]] to i32 addrspace(1)*
+; ALL-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP30]], i64 24
+; ALL-NEXT:    store i32 [[TMP29]], i32 addrspace(1)* [[TMP31]], align 1
+; ALL-NEXT:    [[TMP32:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP20]] to i16 addrspace(1)*
+; ALL-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP32]], i64 50
+; ALL-NEXT:    [[TMP34:%.*]] = load i16, i16 addrspace(1)* [[TMP33]], align 1
+; ALL-NEXT:    [[TMP35:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP21]] to i16 addrspace(1)*
+; ALL-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP35]], i64 50
+; ALL-NEXT:    store i16 [[TMP34]], i16 addrspace(1)* [[TMP36]], align 1
 ; ALL-NEXT:    ret void
 ; ALL:       loop-memcpy-residual-header:
-; ALL-NEXT:    [[TMP27:%.*]] = icmp ne i64 [[TMP2]], 0
-; ALL-NEXT:    br i1 [[TMP27]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; ALL-NEXT:    [[TMP37:%.*]] = icmp ne i64 [[TMP4]], 0
+; ALL-NEXT:    br i1 [[TMP37]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst0, ptr addrspace(1) %src, i64 %n, i1 false)
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 102, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1028(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 256
-; OPT-NEXT:    [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST]], i64 256
-; OPT-NEXT:    store i32 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i32 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP8]], i64 256
+; OPT-NEXT:    [[TMP10:%.*]] = load i32, i32 addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i32 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP11]], i64 256
+; OPT-NEXT:    store i32 [[TMP10]], i32 addrspace(1)* [[TMP12]], align 4
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1028, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1028, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1025(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1024
-; OPT-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1024
-; OPT-NEXT:    store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1024
+; OPT-NEXT:    [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1024
+; OPT-NEXT:    store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 4
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1025, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1025, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1026(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 512
-; OPT-NEXT:    [[TMP7:%.*]] = load i16, ptr addrspace(1) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 512
-; OPT-NEXT:    store i16 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP8]], i64 512
+; OPT-NEXT:    [[TMP10:%.*]] = load i16, i16 addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP11]], i64 512
+; OPT-NEXT:    store i16 [[TMP10]], i16 addrspace(1)* [[TMP12]], align 4
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1026, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1026, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1032(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128
-; OPT-NEXT:    [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128
-; OPT-NEXT:    store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128
+; OPT-NEXT:    [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128
+; OPT-NEXT:    store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1032, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1032, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1034(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128
-; OPT-NEXT:    [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128
-; OPT-NEXT:    store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
-; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 516
-; OPT-NEXT:    [[TMP10:%.*]] = load i16, ptr addrspace(1) [[TMP9]], align 4
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 516
-; OPT-NEXT:    store i16 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128
+; OPT-NEXT:    [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128
+; OPT-NEXT:    store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP13]], i64 516
+; OPT-NEXT:    [[TMP15:%.*]] = load i16, i16 addrspace(1)* [[TMP14]], align 4
+; OPT-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP16]], i64 516
+; OPT-NEXT:    store i16 [[TMP15]], i16 addrspace(1)* [[TMP17]], align 4
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1034, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1034, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1035(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128
-; OPT-NEXT:    [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128
-; OPT-NEXT:    store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
-; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 516
-; OPT-NEXT:    [[TMP10:%.*]] = load i16, ptr addrspace(1) [[TMP9]], align 4
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 516
-; OPT-NEXT:    store i16 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4
-; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1034
-; OPT-NEXT:    [[TMP13:%.*]] = load i8, ptr addrspace(1) [[TMP12]], align 2
-; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1034
-; OPT-NEXT:    store i8 [[TMP13]], ptr addrspace(1) [[TMP14]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128
+; OPT-NEXT:    [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128
+; OPT-NEXT:    store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP13]], i64 516
+; OPT-NEXT:    [[TMP15:%.*]] = load i16, i16 addrspace(1)* [[TMP14]], align 4
+; OPT-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP16]], i64 516
+; OPT-NEXT:    store i16 [[TMP15]], i16 addrspace(1)* [[TMP17]], align 4
+; OPT-NEXT:    [[TMP18:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP18]], i64 1034
+; OPT-NEXT:    [[TMP20:%.*]] = load i8, i8 addrspace(1)* [[TMP19]], align 2
+; OPT-NEXT:    [[TMP21:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP21]], i64 1034
+; OPT-NEXT:    store i8 [[TMP20]], i8 addrspace(1)* [[TMP22]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1035, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1035, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1036(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128
-; OPT-NEXT:    [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128
-; OPT-NEXT:    store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
-; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 258
-; OPT-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP9]], align 4
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST]], i64 258
-; OPT-NEXT:    store i32 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128
+; OPT-NEXT:    [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128
+; OPT-NEXT:    store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i32 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP13]], i64 258
+; OPT-NEXT:    [[TMP15:%.*]] = load i32, i32 addrspace(1)* [[TMP14]], align 4
+; OPT-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i32 addrspace(1)*
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP16]], i64 258
+; OPT-NEXT:    store i32 [[TMP15]], i32 addrspace(1)* [[TMP17]], align 4
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1036, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1036, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1039(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128
-; OPT-NEXT:    [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128
-; OPT-NEXT:    store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
-; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 258
-; OPT-NEXT:    [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP9]], align 4
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST]], i64 258
-; OPT-NEXT:    store i32 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4
-; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 518
-; OPT-NEXT:    [[TMP13:%.*]] = load i16, ptr addrspace(1) [[TMP12]], align 4
-; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 518
-; OPT-NEXT:    store i16 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4
-; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1038
-; OPT-NEXT:    [[TMP16:%.*]] = load i8, ptr addrspace(1) [[TMP15]], align 2
-; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1038
-; OPT-NEXT:    store i8 [[TMP16]], ptr addrspace(1) [[TMP17]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128
+; OPT-NEXT:    [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128
+; OPT-NEXT:    store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i32 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP13]], i64 258
+; OPT-NEXT:    [[TMP15:%.*]] = load i32, i32 addrspace(1)* [[TMP14]], align 4
+; OPT-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i32 addrspace(1)*
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP16]], i64 258
+; OPT-NEXT:    store i32 [[TMP15]], i32 addrspace(1)* [[TMP17]], align 4
+; OPT-NEXT:    [[TMP18:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP18]], i64 518
+; OPT-NEXT:    [[TMP20:%.*]] = load i16, i16 addrspace(1)* [[TMP19]], align 4
+; OPT-NEXT:    [[TMP21:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP21]], i64 518
+; OPT-NEXT:    store i16 [[TMP20]], i16 addrspace(1)* [[TMP22]], align 4
+; OPT-NEXT:    [[TMP23:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP23]], i64 1038
+; OPT-NEXT:    [[TMP25:%.*]] = load i8, i8 addrspace(1)* [[TMP24]], align 2
+; OPT-NEXT:    [[TMP26:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP26]], i64 1038
+; OPT-NEXT:    store i8 [[TMP25]], i8 addrspace(1)* [[TMP27]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1039, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1039, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align2_global_align2_1039(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align2_global_align2_1039(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align2_global_align2_1039(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load i16, ptr addrspace(1) [[TMP1]], align 2
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store i16 [[TMP2]], ptr addrspace(1) [[TMP3]], align 2
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 519
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load i16, i16 addrspace(1)* [[TMP3]], align 2
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store i16 [[TMP4]], i16 addrspace(1)* [[TMP5]], align 2
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 519
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1038
-; OPT-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 2
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1038
-; OPT-NEXT:    store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast i16 addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1038
+; OPT-NEXT:    [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 2
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast i16 addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1038
+; OPT-NEXT:    store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 %src, i64 1039, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %dst, i8 addrspace(1)* align 2 %src, i64 1039, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_1027(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 512
-; OPT-NEXT:    [[TMP7:%.*]] = load i16, ptr addrspace(1) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 512
-; OPT-NEXT:    store i16 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4
-; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1026
-; OPT-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(1) [[TMP9]], align 2
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1026
-; OPT-NEXT:    store i8 [[TMP10]], ptr addrspace(1) [[TMP11]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP8]], i64 512
+; OPT-NEXT:    [[TMP10:%.*]] = load i16, i16 addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP11]], i64 512
+; OPT-NEXT:    store i16 [[TMP10]], i16 addrspace(1)* [[TMP12]], align 4
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 1026
+; OPT-NEXT:    [[TMP15:%.*]] = load i8, i8 addrspace(1)* [[TMP14]], align 2
+; OPT-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP16]], i64 1026
+; OPT-NEXT:    store i8 [[TMP15]], i8 addrspace(1)* [[TMP17]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1027, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1027, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align2_global_align4_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align2_global_align4_1027(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align2_global_align4_1027(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load i16, ptr addrspace(1) [[TMP1]], align 2
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store i16 [[TMP2]], ptr addrspace(1) [[TMP3]], align 2
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 513
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load i16, i16 addrspace(1)* [[TMP3]], align 2
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store i16 [[TMP4]], i16 addrspace(1)* [[TMP5]], align 2
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 513
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1026
-; OPT-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 2
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1026
-; OPT-NEXT:    store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast i16 addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1026
+; OPT-NEXT:    [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 2
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast i16 addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1026
+; OPT-NEXT:    store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 4 %src, i64 1027, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %dst, i8 addrspace(1)* align 4 %src, i64 1027, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align2_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align2_1027(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align2_1027(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load i16, ptr addrspace(1) [[TMP1]], align 2
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store i16 [[TMP2]], ptr addrspace(1) [[TMP3]], align 2
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 513
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load i16, i16 addrspace(1)* [[TMP3]], align 2
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store i16 [[TMP4]], i16 addrspace(1)* [[TMP5]], align 2
+; OPT-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 513
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1026
-; OPT-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 2
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1026
-; OPT-NEXT:    store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast i16 addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1026
+; OPT-NEXT:    [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 2
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast i16 addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1026
+; OPT-NEXT:    store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 2 %src, i64 1027, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 2 %src, i64 1027, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 {
 ; OPT-LABEL: @memcpy_private_align4_private_align4_1027(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to <4 x i32> addrspace(5)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to <4 x i32> addrspace(5)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(5)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(5)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC]], i32 512
-; OPT-NEXT:    [[TMP7:%.*]] = load i16, ptr addrspace(5) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST]], i32 512
-; OPT-NEXT:    store i16 [[TMP7]], ptr addrspace(5) [[TMP8]], align 4
-; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026
-; OPT-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(5) [[TMP9]], align 2
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026
-; OPT-NEXT:    store i8 [[TMP10]], ptr addrspace(5) [[TMP11]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i16 addrspace(5)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP8]], i32 512
+; OPT-NEXT:    [[TMP10:%.*]] = load i16, i16 addrspace(5)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i16 addrspace(5)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP11]], i32 512
+; OPT-NEXT:    store i16 [[TMP10]], i16 addrspace(5)* [[TMP12]], align 4
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP13]], i32 1026
+; OPT-NEXT:    [[TMP15:%.*]] = load i8, i8 addrspace(5)* [[TMP14]], align 2
+; OPT-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP16]], i32 1026
+; OPT-NEXT:    store i8 [[TMP15]], i8 addrspace(5)* [[TMP17]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false)
+  call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 4 %dst, i8 addrspace(5)* align 4 %src, i32 1027, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align2_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align2_private_align4_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 {
 ; OPT-LABEL: @memcpy_private_align2_private_align4_1027(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to i16 addrspace(5)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to i16 addrspace(5)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load i16, ptr addrspace(5) [[TMP1]], align 2
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store i16 [[TMP2]], ptr addrspace(5) [[TMP3]], align 2
-; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 513
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load i16, i16 addrspace(5)* [[TMP3]], align 2
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store i16 [[TMP4]], i16 addrspace(5)* [[TMP5]], align 2
+; OPT-NEXT:    [[TMP6]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 513
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026
-; OPT-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(5) [[TMP6]], align 2
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026
-; OPT-NEXT:    store i8 [[TMP7]], ptr addrspace(5) [[TMP8]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast i16 addrspace(5)* [[TMP1]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP8]], i32 1026
+; OPT-NEXT:    [[TMP10:%.*]] = load i8, i8 addrspace(5)* [[TMP9]], align 2
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast i16 addrspace(5)* [[TMP2]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP11]], i32 1026
+; OPT-NEXT:    store i8 [[TMP10]], i8 addrspace(5)* [[TMP12]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false)
+  call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 2 %dst, i8 addrspace(5)* align 4 %src, i32 1027, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 {
 ; OPT-LABEL: @memcpy_private_align1_private_align4_1027(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to <4 x i32> addrspace(5)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to <4 x i32> addrspace(5)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 4
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1
-; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(5)* [[TMP3]], align 4
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(5)* [[TMP5]], align 1
+; OPT-NEXT:    [[TMP6]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC]], i32 512
-; OPT-NEXT:    [[TMP7:%.*]] = load i16, ptr addrspace(5) [[TMP6]], align 4
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST]], i32 512
-; OPT-NEXT:    store i16 [[TMP7]], ptr addrspace(5) [[TMP8]], align 1
-; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026
-; OPT-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(5) [[TMP9]], align 2
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026
-; OPT-NEXT:    store i8 [[TMP10]], ptr addrspace(5) [[TMP11]], align 1
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i16 addrspace(5)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP8]], i32 512
+; OPT-NEXT:    [[TMP10:%.*]] = load i16, i16 addrspace(5)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i16 addrspace(5)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP11]], i32 512
+; OPT-NEXT:    store i16 [[TMP10]], i16 addrspace(5)* [[TMP12]], align 1
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP13]], i32 1026
+; OPT-NEXT:    [[TMP15:%.*]] = load i8, i8 addrspace(5)* [[TMP14]], align 2
+; OPT-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP16]], i32 1026
+; OPT-NEXT:    store i8 [[TMP15]], i8 addrspace(5)* [[TMP17]], align 1
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false)
+  call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 1 %dst, i8 addrspace(5)* align 4 %src, i32 1027, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align4_private_align2_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align4_private_align2_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 {
 ; OPT-LABEL: @memcpy_private_align4_private_align2_1027(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to i16 addrspace(5)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to i16 addrspace(5)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load i16, ptr addrspace(5) [[TMP1]], align 2
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store i16 [[TMP2]], ptr addrspace(5) [[TMP3]], align 2
-; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 513
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load i16, i16 addrspace(5)* [[TMP3]], align 2
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store i16 [[TMP4]], i16 addrspace(5)* [[TMP5]], align 2
+; OPT-NEXT:    [[TMP6]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 513
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026
-; OPT-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(5) [[TMP6]], align 2
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026
-; OPT-NEXT:    store i8 [[TMP7]], ptr addrspace(5) [[TMP8]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast i16 addrspace(5)* [[TMP1]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP8]], i32 1026
+; OPT-NEXT:    [[TMP10:%.*]] = load i8, i8 addrspace(5)* [[TMP9]], align 2
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast i16 addrspace(5)* [[TMP2]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP11]], i32 1026
+; OPT-NEXT:    store i8 [[TMP10]], i8 addrspace(5)* [[TMP12]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 2 %src, i32 1027, i1 false)
+  call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 4 %dst, i8 addrspace(5)* align 2 %src, i32 1027, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 {
 ; OPT-LABEL: @memcpy_private_align4_private_align1_1027(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to <4 x i32> addrspace(5)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to <4 x i32> addrspace(5)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 1
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 64
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(5)* [[TMP3]], align 1
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(5)* [[TMP5]], align 4
+; OPT-NEXT:    [[TMP6]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 64
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC]], i32 512
-; OPT-NEXT:    [[TMP7:%.*]] = load i16, ptr addrspace(5) [[TMP6]], align 1
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST]], i32 512
-; OPT-NEXT:    store i16 [[TMP7]], ptr addrspace(5) [[TMP8]], align 4
-; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026
-; OPT-NEXT:    [[TMP10:%.*]] = load i8, ptr addrspace(5) [[TMP9]], align 1
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026
-; OPT-NEXT:    store i8 [[TMP10]], ptr addrspace(5) [[TMP11]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i16 addrspace(5)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP8]], i32 512
+; OPT-NEXT:    [[TMP10:%.*]] = load i16, i16 addrspace(5)* [[TMP9]], align 1
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i16 addrspace(5)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP11]], i32 512
+; OPT-NEXT:    store i16 [[TMP10]], i16 addrspace(5)* [[TMP12]], align 4
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP13]], i32 1026
+; OPT-NEXT:    [[TMP15:%.*]] = load i8, i8 addrspace(5)* [[TMP14]], align 1
+; OPT-NEXT:    [[TMP16:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP16]], i32 1026
+; OPT-NEXT:    store i8 [[TMP15]], i8 addrspace(5)* [[TMP17]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 1 %src, i32 1027, i1 false)
+  call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 4 %dst, i8 addrspace(5)* align 1 %src, i32 1027, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_private_align2_private_align2_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 {
+define amdgpu_kernel void @memcpy_private_align2_private_align2_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 {
 ; OPT-LABEL: @memcpy_private_align2_private_align2_1027(
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to i16 addrspace(5)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to i16 addrspace(5)*
 ; OPT-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; OPT:       load-store-loop:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load i16, ptr addrspace(5) [[TMP1]], align 2
-; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store i16 [[TMP2]], ptr addrspace(5) [[TMP3]], align 2
-; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 513
-; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP4:%.*]] = load i16, i16 addrspace(5)* [[TMP3]], align 2
+; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store i16 [[TMP4]], i16 addrspace(5)* [[TMP5]], align 2
+; OPT-NEXT:    [[TMP6]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 513
+; OPT-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
-; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026
-; OPT-NEXT:    [[TMP7:%.*]] = load i8, ptr addrspace(5) [[TMP6]], align 2
-; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026
-; OPT-NEXT:    store i8 [[TMP7]], ptr addrspace(5) [[TMP8]], align 2
+; OPT-NEXT:    [[TMP8:%.*]] = bitcast i16 addrspace(5)* [[TMP1]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP8]], i32 1026
+; OPT-NEXT:    [[TMP10:%.*]] = load i8, i8 addrspace(5)* [[TMP9]], align 2
+; OPT-NEXT:    [[TMP11:%.*]] = bitcast i16 addrspace(5)* [[TMP2]] to i8 addrspace(5)*
+; OPT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP11]], i32 1026
+; OPT-NEXT:    store i8 [[TMP10]], i8 addrspace(5)* [[TMP12]], align 2
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 %src, i32 1027, i1 false)
+  call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 2 %dst, i8 addrspace(5)* align 2 %src, i32 1027, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_variable(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
 ; OPT-LABEL: @memcpy_global_align4_global_align4_variable(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16
-; OPT-NEXT:    [[TMP2:%.*]] = urem i64 [[N]], 16
-; OPT-NEXT:    [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16
+; OPT-NEXT:    [[TMP4:%.*]] = urem i64 [[N]], 16
+; OPT-NEXT:    [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 4
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 4
-; OPT-NEXT:    [[TMP8]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 4
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP10]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 4
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 4
-; OPT-NEXT:    [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 4
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 4
+; OPT-NEXT:    [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 %n, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align2_global_align2_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @memcpy_global_align2_global_align2_variable(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
 ; OPT-LABEL: @memcpy_global_align2_global_align2_variable(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i64 [[N:%.*]], 2
-; OPT-NEXT:    [[TMP2:%.*]] = urem i64 [[N]], 2
-; OPT-NEXT:    [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i64 [[N:%.*]], 2
+; OPT-NEXT:    [[TMP4:%.*]] = urem i64 [[N]], 2
+; OPT-NEXT:    [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load i16, ptr addrspace(1) [[TMP5]], align 2
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store i16 [[TMP6]], ptr addrspace(1) [[TMP7]], align 2
-; OPT-NEXT:    [[TMP8]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load i16, i16 addrspace(1)* [[TMP7]], align 2
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store i16 [[TMP8]], i16 addrspace(1)* [[TMP9]], align 2
+; OPT-NEXT:    [[TMP10]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 2
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 2
-; OPT-NEXT:    [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast i16 addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast i16 addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 2
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 2
+; OPT-NEXT:    [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 %src, i64 %n, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %dst, i8 addrspace(1)* align 2 %src, i64 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align1_global_align1_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 {
+define amdgpu_kernel void @memcpy_global_align1_global_align1_variable(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 {
 ; OPT-LABEL: @memcpy_global_align1_global_align1_variable(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16
-; OPT-NEXT:    [[TMP2:%.*]] = urem i64 [[N]], 16
-; OPT-NEXT:    [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16
+; OPT-NEXT:    [[TMP4:%.*]] = urem i64 [[N]], 16
+; OPT-NEXT:    [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1
-; OPT-NEXT:    [[TMP8]] = add i64 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; OPT-NEXT:    store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1
+; OPT-NEXT:    [[TMP10]] = add i64 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1
-; OPT-NEXT:    [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1
+; OPT-NEXT:    [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 %src, i64 %n, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 1 %dst, i8 addrspace(1)* align 1 %src, i64 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_local_align4_local_align4_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_local_align4_local_align4_variable(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
 ; OPT-LABEL: @memcpy_local_align4_local_align4_variable(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8
-; OPT-NEXT:    [[TMP2:%.*]] = urem i32 [[N]], 8
-; OPT-NEXT:    [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to <2 x i32> addrspace(3)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(3)* [[DST:%.*]] to <2 x i32> addrspace(3)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8
+; OPT-NEXT:    [[TMP4:%.*]] = urem i32 [[N]], 8
+; OPT-NEXT:    [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(3) [[TMP5]], align 4
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <2 x i32> [[TMP6]], ptr addrspace(3) [[TMP7]], align 4
-; OPT-NEXT:    [[TMP8]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(3)* [[TMP7]], align 4
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store <2 x i32> [[TMP8]], <2 x i32> addrspace(3)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP10]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 4
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[DST]], i32 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(3) [[TMP13]], align 4
-; OPT-NEXT:    [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP1]] to i8 addrspace(3)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP2]] to i8 addrspace(3)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 4
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP13]], i32 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(3)* [[TMP17]], align 4
+; OPT-NEXT:    [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) align 4 %dst, ptr addrspace(3) align 4 %src, i32 %n, i1 false)
+  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %dst, i8 addrspace(3)* align 4 %src, i32 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_local_align2_local_align2_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_local_align2_local_align2_variable(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
 ; OPT-LABEL: @memcpy_local_align2_local_align2_variable(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i32 [[N:%.*]], 2
-; OPT-NEXT:    [[TMP2:%.*]] = urem i32 [[N]], 2
-; OPT-NEXT:    [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to i16 addrspace(3)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(3)* [[DST:%.*]] to i16 addrspace(3)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i32 [[N:%.*]], 2
+; OPT-NEXT:    [[TMP4:%.*]] = urem i32 [[N]], 2
+; OPT-NEXT:    [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load i16, ptr addrspace(3) [[TMP5]], align 2
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store i16 [[TMP6]], ptr addrspace(3) [[TMP7]], align 2
-; OPT-NEXT:    [[TMP8]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16 addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load i16, i16 addrspace(3)* [[TMP7]], align 2
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(3)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store i16 [[TMP8]], i16 addrspace(3)* [[TMP9]], align 2
+; OPT-NEXT:    [[TMP10]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 2
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[DST]], i32 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(3) [[TMP13]], align 2
-; OPT-NEXT:    [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast i16 addrspace(3)* [[TMP1]] to i8 addrspace(3)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast i16 addrspace(3)* [[TMP2]] to i8 addrspace(3)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 2
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP13]], i32 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(3)* [[TMP17]], align 2
+; OPT-NEXT:    [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 %src, i32 %n, i1 false)
+  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %dst, i8 addrspace(3)* align 2 %src, i32 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_local_align1_local_align1_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_local_align1_local_align1_variable(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
 ; OPT-LABEL: @memcpy_local_align1_local_align1_variable(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8
-; OPT-NEXT:    [[TMP2:%.*]] = urem i32 [[N]], 8
-; OPT-NEXT:    [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to <2 x i32> addrspace(3)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(3)* [[DST:%.*]] to <2 x i32> addrspace(3)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8
+; OPT-NEXT:    [[TMP4:%.*]] = urem i32 [[N]], 8
+; OPT-NEXT:    [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(3) [[TMP5]], align 1
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <2 x i32> [[TMP6]], ptr addrspace(3) [[TMP7]], align 1
-; OPT-NEXT:    [[TMP8]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(3)* [[TMP7]], align 1
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store <2 x i32> [[TMP8]], <2 x i32> addrspace(3)* [[TMP9]], align 1
+; OPT-NEXT:    [[TMP10]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 1
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[DST]], i32 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(3) [[TMP13]], align 1
-; OPT-NEXT:    [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP1]] to i8 addrspace(3)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP2]] to i8 addrspace(3)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 1
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP13]], i32 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(3)* [[TMP17]], align 1
+; OPT-NEXT:    [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 %src, i32 %n, i1 false)
+  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 1 %dst, i8 addrspace(3)* align 1 %src, i32 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_local_align4_global_align4_variable(ptr addrspace(3) %dst, ptr addrspace(1) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_local_align4_global_align4_variable(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %n) #0 {
 ; OPT-LABEL: @memcpy_local_align4_global_align4_variable(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8
-; OPT-NEXT:    [[TMP2:%.*]] = urem i32 [[N]], 8
-; OPT-NEXT:    [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <2 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(3)* [[DST:%.*]] to <2 x i32> addrspace(3)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8
+; OPT-NEXT:    [[TMP4:%.*]] = urem i32 [[N]], 8
+; OPT-NEXT:    [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(1) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP5]], align 4
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <2 x i32> [[TMP6]], ptr addrspace(3) [[TMP7]], align 4
-; OPT-NEXT:    [[TMP8]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(1)* [[TMP7]], align 4
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store <2 x i32> [[TMP8]], <2 x i32> addrspace(3)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP10]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i32 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 4
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[DST]], i32 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(3) [[TMP13]], align 4
-; OPT-NEXT:    [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP2]] to i8 addrspace(3)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i32 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 4
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP13]], i32 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(3)* [[TMP17]], align 4
+; OPT-NEXT:    [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p3i8.p1i8.i32(ptr addrspace(3) align 4 %dst, ptr addrspace(1) align 4 %src, i32 %n, i1 false)
+  call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %dst, i8 addrspace(1)* align 4 %src, i32 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_local_align4_variable(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n) #0 {
+define amdgpu_kernel void @memcpy_global_align4_local_align4_variable(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 {
 ; OPT-LABEL: @memcpy_global_align4_local_align4_variable(
-; OPT-NEXT:    [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8
-; OPT-NEXT:    [[TMP2:%.*]] = urem i32 [[N]], 8
-; OPT-NEXT:    [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]]
-; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0
-; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
+; OPT-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to <2 x i32> addrspace(3)*
+; OPT-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <2 x i32> addrspace(1)*
+; OPT-NEXT:    [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8
+; OPT-NEXT:    [[TMP4:%.*]] = urem i32 [[N]], 8
+; OPT-NEXT:    [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]]
+; OPT-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0
+; OPT-NEXT:    br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
 ; OPT:       loop-memcpy-expansion:
-; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
-; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(3) [[TMP5]], align 4
-; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(1) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 4
-; OPT-NEXT:    [[TMP8]] = add i32 [[LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]]
-; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
+; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
+; OPT-NEXT:    [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(3)* [[TMP7]], align 4
+; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* [[TMP2]], i32 [[LOOP_INDEX]]
+; OPT-NEXT:    store <2 x i32> [[TMP8]], <2 x i32> addrspace(1)* [[TMP9]], align 4
+; OPT-NEXT:    [[TMP10]] = add i32 [[LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]]
+; OPT-NEXT:    br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
 ; OPT:       loop-memcpy-residual:
-; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
-; OPT-NEXT:    [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]]
-; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]]
-; OPT-NEXT:    [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 4
-; OPT-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i32 [[TMP10]]
-; OPT-NEXT:    store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 4
-; OPT-NEXT:    [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
-; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]]
-; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
+; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
+; OPT-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP1]] to i8 addrspace(3)*
+; OPT-NEXT:    [[TMP13:%.*]] = bitcast <2 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)*
+; OPT-NEXT:    [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]]
+; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]]
+; OPT-NEXT:    [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 4
+; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i32 [[TMP14]]
+; OPT-NEXT:    store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 4
+; OPT-NEXT:    [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1
+; OPT-NEXT:    [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]]
+; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0
+; OPT-NEXT:    br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
-  call void @llvm.memcpy.p1i8.p3i8.i32(ptr addrspace(1) align 4 %dst, ptr addrspace(3) align 4 %src, i32 %n, i1 false)
+  call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %dst, i8 addrspace(3)* align 4 %src, i32 %n, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_16(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_16(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_16(
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 16, i1 false)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 16, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memcpy_global_align4_global_align4_16(
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)*
+; ALL-NEXT:    [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)*
 ; ALL-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
 ; ALL:       load-store-loop:
-; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 1
-; ALL-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1
-; ALL-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
+; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ]
+; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4
+; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4
+; ALL-NEXT:    [[TMP6]] = add i64 [[LOOP_INDEX]], 1
+; ALL-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 1
+; ALL-NEXT:    br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; ALL:       memcpy-split:
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 16, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 16, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_12(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_12(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_12(
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 12, i1 false)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 12, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memcpy_global_align4_global_align4_12(
-; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 4
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST:%.*]], i64 0
-; ALL-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; ALL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 2
-; ALL-NEXT:    [[TMP5:%.*]] = load i32, ptr addrspace(1) [[TMP4]], align 4
-; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST]], i64 2
-; ALL-NEXT:    store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i64 addrspace(1)*
+; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP1]], i64 0
+; ALL-NEXT:    [[TMP3:%.*]] = load i64, i64 addrspace(1)* [[TMP2]], align 4
+; ALL-NEXT:    [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i64 addrspace(1)*
+; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP4]], i64 0
+; ALL-NEXT:    store i64 [[TMP3]], i64 addrspace(1)* [[TMP5]], align 4
+; ALL-NEXT:    [[TMP6:%.*]] = bitcast i8 addrspace(1)* [[SRC]] to i32 addrspace(1)*
+; ALL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i64 2
+; ALL-NEXT:    [[TMP8:%.*]] = load i32, i32 addrspace(1)* [[TMP7]], align 4
+; ALL-NEXT:    [[TMP9:%.*]] = bitcast i8 addrspace(1)* [[DST]] to i32 addrspace(1)*
+; ALL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP9]], i64 2
+; ALL-NEXT:    store i32 [[TMP8]], i32 addrspace(1)* [[TMP10]], align 4
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 12, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 12, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_8(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_8(
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 8, i1 false)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 8, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memcpy_global_align4_global_align4_8(
-; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 4
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST:%.*]], i64 0
-; ALL-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i64 addrspace(1)*
+; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP1]], i64 0
+; ALL-NEXT:    [[TMP3:%.*]] = load i64, i64 addrspace(1)* [[TMP2]], align 4
+; ALL-NEXT:    [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i64 addrspace(1)*
+; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP4]], i64 0
+; ALL-NEXT:    store i64 [[TMP3]], i64 addrspace(1)* [[TMP5]], align 4
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 8, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 8, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_10(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_10(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_10(
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 10, i1 false)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 10, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memcpy_global_align4_global_align4_10(
-; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 4
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST:%.*]], i64 0
-; ALL-NEXT:    store i64 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; ALL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 4
-; ALL-NEXT:    [[TMP5:%.*]] = load i16, ptr addrspace(1) [[TMP4]], align 4
-; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 4
-; ALL-NEXT:    store i16 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i64 addrspace(1)*
+; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP1]], i64 0
+; ALL-NEXT:    [[TMP3:%.*]] = load i64, i64 addrspace(1)* [[TMP2]], align 4
+; ALL-NEXT:    [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i64 addrspace(1)*
+; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP4]], i64 0
+; ALL-NEXT:    store i64 [[TMP3]], i64 addrspace(1)* [[TMP5]], align 4
+; ALL-NEXT:    [[TMP6:%.*]] = bitcast i8 addrspace(1)* [[SRC]] to i16 addrspace(1)*
+; ALL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP6]], i64 4
+; ALL-NEXT:    [[TMP8:%.*]] = load i16, i16 addrspace(1)* [[TMP7]], align 4
+; ALL-NEXT:    [[TMP9:%.*]] = bitcast i8 addrspace(1)* [[DST]] to i16 addrspace(1)*
+; ALL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP9]], i64 4
+; ALL-NEXT:    store i16 [[TMP8]], i16 addrspace(1)* [[TMP10]], align 4
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 10, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 10, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_4(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_4(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_4(
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 4, i1 false)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 4, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memcpy_global_align4_global_align4_4(
-; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST:%.*]], i64 0
-; ALL-NEXT:    store i32 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i32 addrspace(1)*
+; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP1]], i64 0
+; ALL-NEXT:    [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[TMP2]], align 4
+; ALL-NEXT:    [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i32 addrspace(1)*
+; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i64 0
+; ALL-NEXT:    store i32 [[TMP3]], i32 addrspace(1)* [[TMP5]], align 4
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 4, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 4, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_2(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_2(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_2(
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 2, i1 false)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 2, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memcpy_global_align4_global_align4_2(
-; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = load i16, ptr addrspace(1) [[TMP1]], align 4
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 0
-; ALL-NEXT:    store i16 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; ALL-NEXT:    [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)*
+; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 0
+; ALL-NEXT:    [[TMP3:%.*]] = load i16, i16 addrspace(1)* [[TMP2]], align 4
+; ALL-NEXT:    [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)*
+; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP4]], i64 0
+; ALL-NEXT:    store i16 [[TMP3]], i16 addrspace(1)* [[TMP5]], align 4
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 2, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 2, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @memcpy_global_align4_global_align4_1(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+define amdgpu_kernel void @memcpy_global_align4_global_align4_1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 {
 ; MAX1024-LABEL: @memcpy_global_align4_global_align4_1(
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 1, i1 false)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 1, i1 false)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memcpy_global_align4_global_align4_1(
-; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 0
-; ALL-NEXT:    [[TMP2:%.*]] = load i8, ptr addrspace(1) [[TMP1]], align 4
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 0
-; ALL-NEXT:    store i8 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC:%.*]], i64 0
+; ALL-NEXT:    [[TMP2:%.*]] = load i8, i8 addrspace(1)* [[TMP1]], align 4
+; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 0
+; ALL-NEXT:    store i8 [[TMP2]], i8 addrspace(1)* [[TMP3]], align 4
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1, i1 false)
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1, i1 false)
   ret void
 }
 
Index: mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
===================================================================
--- mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
+++ mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp
@@ -276,6 +276,9 @@
     SmallVector<int64_t> newOutputShape;
     ArrayRef<int64_t> oldShape =
         linalgOp.getShape(linalgOp.getDpsInitOperand(0));
+    assert(sizes.size() == oldShape.size() + 1 &&
+           "result tensor should have rank exactly one dimension smaller than "
+           "the number of loops.");
     SmallVector<Value> dynamicDims;
     for (int64_t idx : llvm::seq<int64_t>(0, oldShape.size() + 1)) {
       if (idx == insertSplitDimension) {
Index: mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
===================================================================
--- mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
+++ mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
@@ -453,6 +453,19 @@
       break;
     }
   }
+  {
+    auto origResultTensor = cast<DestinationStyleOpInterface>(op.getOperation())
+                                .getDpsInitOperand(0);
+    size_t origResultSize = 0;
+    if (auto shapedType =
+            origResultTensor->get().getType().dyn_cast<ShapedType>())
+      origResultSize = shapedType.getShape().size();
+    if (iterationDomain.size() != origResultSize + 1) {
+      return b.notifyMatchFailure(
+          op, "only support result tensor whose rank is exactly one dimension "
+              "smaller than the number of loops.");
+    }
+  }
   // 1. create the inital tensor value.
   FailureOr<Operation *> identityTensor =
       op.generateInitialTensorForPartialReduction(b, loc, tileSize,
Index: mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
===================================================================
--- mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
+++ mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
@@ -273,3 +273,4 @@
   %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
   %1:4 = transform.structured.split_reduction %0 { split_factor = 4, insert_split_dimension = 2, inner_parallel}
 }
+
Index: mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
===================================================================
--- mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
+++ mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
@@ -197,3 +197,36 @@
 //     CHECK:     linalg.yield
 //     CHECK:   } -> tensor<?x?xf32>
 //     CHECK:   return %[[R]] : tensor<?x?xf32>
+
+// -----
+
+func.func @reduction_bug(%arg0: tensor<32x32xi32>, %arg1: tensor<32x32xi32>, %out: tensor<32xi32>) -> tensor<32xi32> {
+  %red = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>,
+                                          affine_map<(d0, d1, d2) -> (d0, d1)>,
+                                          affine_map<(d0, d1, d2) -> (d0)>],
+   iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<32x32xi32>, tensor<32x32xi32>) outs(%out : tensor<32xi32>) {
+    ^bb0(%a: i32, %b: i32, %c: i32):
+      %r1 = arith.muli %a, %b: i32
+      %r2 = arith.addi %c, %r1 : i32
+      linalg.yield %r2 : i32
+    } -> tensor<32xi32>
+  return %red : tensor<32xi32>
+}
+
+transform.sequence failures(suppress) {
+^bb0(%arg1: !pdl.operation):
+  %0 = transform.structured.match ops{["linalg.generic"]} in %arg1
+  %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [0, 0, 8] }
+}
+    
+// //   CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)>	
+// //   CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)>	
+// //   CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0)>	
+// // CHECK-LABEL:  func @reduction_bug
+// //       CHECK: %[[RED:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "reduction"]}	
+// //  CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<32x32xi32>, tensor<32x32xi32>) outs(%[[F]] : tensor<32xi32>) {	
+// //       CHECK:   arith.muli
+// //       CHECK:   arith.addi
+// //       CHECK:   linalg.yield	
+// //       CHECK: } -> tensor<32xi32>	
+// //       CHECK: return %[[RED]] : tensor<32xi32>