Index: clang/include/clang/Basic/BuiltinsHexagonDep.def =================================================================== --- clang/include/clang/Basic/BuiltinsHexagonDep.def +++ clang/include/clang/Basic/BuiltinsHexagonDep.def @@ -1890,3 +1890,36 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhsat_128B, "V32iV64iV32i", "", HVXV69) TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyuhvs, "V16iV16iV16i", "", HVXV69) TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyuhvs_128B, "V32iV32iV32i", "", HVXV69) + +// V73 HVX Instructions. + +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_bf, "V32iV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_bf_128B, "V64iV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_h_hf, "V16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_h_hf_128B, "V32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_h, "V16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_h_128B, "V32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_sf_w, "V16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_sf_w_128B, "V32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_w_sf, "V16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_w_sf_128B, "V32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_bf_sf, "V16iV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_bf_sf_128B, "V32iV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf, "V64bV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_128B, "V128bV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_and, "V64bV64bV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_and_128B, "V128bV128bV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_or, "V64bV64bV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_or_128B, "V128bV128bV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_xor, "V64bV64bV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtbf_xor_128B, "V128bV128bV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_bf, "V16iV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_bf_128B, "V32iV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_bf, "V16iV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_bf_128B, "V32iV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_bf, "V32iV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_bf_128B, "V64iV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_bf_acc, "V32iV32iV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_bf_acc_128B, "V64iV64iV32iV32i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_bf, "V32iV16iV16i", "", HVXV73) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_bf_128B, "V64iV32iV32i", "", HVXV73) Index: lldb/include/lldb/Core/Module.h =================================================================== --- lldb/include/lldb/Core/Module.h +++ lldb/include/lldb/Core/Module.h @@ -815,8 +815,6 @@ llvm::Expected GetTypeSystemForLanguage(lldb::LanguageType language); - /// Call \p callback for each \p TypeSystem in this \p Module. - /// Return true from callback to keep iterating, false to stop iterating. void ForEachTypeSystem(llvm::function_ref callback); // Special error functions that can do printf style formatting that will Index: llvm/include/llvm/IR/IntrinsicsHexagonDep.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsHexagonDep.td +++ llvm/include/llvm/IR/IntrinsicsHexagonDep.td @@ -316,7 +316,7 @@ [llvm_v32i32_ty], [llvm_v64i32_ty], intr_properties>; -// tag : V6_lvsplatw +// tag : V6_lvsplatb class Hexagon_v16i32_i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; -// tag : V6_vadd_sf_hf +// tag : V6_vadd_sf_bf class Hexagon_v32i32_v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; -// tag : V6_vadd_sf_hf +// tag : V6_vadd_sf_bf class Hexagon_v64i32_v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; +// V73 HVX Instructions. + +def int_hexagon_V6_vadd_sf_bf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_sf_bf">; + +def int_hexagon_V6_vadd_sf_bf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_sf_bf_128B">; + +def int_hexagon_V6_vconv_h_hf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_h_hf">; + +def int_hexagon_V6_vconv_h_hf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_h_hf_128B">; + +def int_hexagon_V6_vconv_hf_h : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_hf_h">; + +def int_hexagon_V6_vconv_hf_h_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_hf_h_128B">; + +def int_hexagon_V6_vconv_sf_w : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_sf_w">; + +def int_hexagon_V6_vconv_sf_w_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_sf_w_128B">; + +def int_hexagon_V6_vconv_w_sf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_w_sf">; + +def int_hexagon_V6_vconv_w_sf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_w_sf_128B">; + +def int_hexagon_V6_vcvt_bf_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt_bf_sf">; + +def int_hexagon_V6_vcvt_bf_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt_bf_sf_128B">; + +def int_hexagon_V6_vgtbf : +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtbf">; + +def int_hexagon_V6_vgtbf_128B : +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtbf_128B">; + +def int_hexagon_V6_vgtbf_and : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtbf_and">; + +def int_hexagon_V6_vgtbf_and_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtbf_and_128B">; + +def int_hexagon_V6_vgtbf_or : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtbf_or">; + +def int_hexagon_V6_vgtbf_or_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtbf_or_128B">; + +def int_hexagon_V6_vgtbf_xor : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtbf_xor">; + +def int_hexagon_V6_vgtbf_xor_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtbf_xor_128B">; + +def int_hexagon_V6_vmax_bf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmax_bf">; + +def int_hexagon_V6_vmax_bf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmax_bf_128B">; + +def int_hexagon_V6_vmin_bf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmin_bf">; + +def int_hexagon_V6_vmin_bf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmin_bf_128B">; + +def int_hexagon_V6_vmpy_sf_bf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_bf">; + +def int_hexagon_V6_vmpy_sf_bf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_bf_128B">; + +def int_hexagon_V6_vmpy_sf_bf_acc : +Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_bf_acc">; + +def int_hexagon_V6_vmpy_sf_bf_acc_128B : +Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_bf_acc_128B">; + +def int_hexagon_V6_vsub_sf_bf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_sf_bf">; + +def int_hexagon_V6_vsub_sf_bf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_sf_bf_128B">; + Index: llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp =================================================================== --- llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -354,6 +354,11 @@ return false; return Value == -1; } + bool issgp10Const() const { + if (!isReg()) + return false; + return getReg() == Hexagon::SGP1_0; + } bool iss11_0Imm() const { return CheckImmRange(11 + 26, 0, true, true, true); } @@ -400,6 +405,9 @@ void addn1ConstOperands(MCInst &Inst, unsigned N) const { addImmOperands(Inst, N); } + void addsgp10ConstOperands(MCInst &Inst, unsigned N) const { + addRegOperands(Inst, N); + } StringRef getToken() const { assert(Kind == Token && "Invalid access!"); Index: llvm/lib/Target/Hexagon/Hexagon.td =================================================================== --- llvm/lib/Target/Hexagon/Hexagon.td +++ llvm/lib/Target/Hexagon/Hexagon.td @@ -58,6 +58,14 @@ "Hexagon::ArchEnum::V69", "Hexagon HVX instructions", [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68]>; +def ExtensionHVXV71: SubtargetFeature<"hvxv71", "HexagonHVXVersion", + "Hexagon::ArchEnum::V71", "Hexagon HVX instructions", + [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, + ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69]>; +def ExtensionHVXV73: SubtargetFeature<"hvxv73", "HexagonHVXVersion", + "Hexagon::ArchEnum::V73", "Hexagon HVX instructions", + [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, + ExtensionHVXV67, ExtensionHVXV68, ExtensionHVXV69, ExtensionHVXV71]>; def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>; @@ -125,6 +133,10 @@ AssemblerPredicate<(all_of ExtensionHVXV68)>; def UseHVXV69 : Predicate<"HST->useHVXV69Ops()">, AssemblerPredicate<(all_of ExtensionHVXV69)>; +def UseHVXV71 : Predicate<"HST->useHVXV71Ops()">, + AssemblerPredicate<(all_of ExtensionHVXV71)>; +def UseHVXV73 : Predicate<"HST->useHVXV73Ops()">, + AssemblerPredicate<(all_of ExtensionHVXV73)>; def UseAudio : Predicate<"HST->useAudioOps()">, AssemblerPredicate<(all_of ExtensionAudio)>; def UseZReg : Predicate<"HST->useZRegOps()">, @@ -439,6 +451,17 @@ FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, FeatureCabac]>; +def : Proc<"hexagonv71", HexagonModelV71, + [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, + ArchV68, ArchV69, ArchV71, + FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; +def : Proc<"hexagonv73", HexagonModelV73, + [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, + ArchV68, ArchV69, ArchV71, ArchV73, + FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; // Need to update the correct features for tiny core. // Disable NewValueJumps since the packetizer is unable to handle a packet with // a new value jump and another SLOT0 instruction. @@ -448,6 +471,13 @@ FeatureCompound, FeatureMemNoShuf, FeatureMemops, FeatureNVS, FeaturePackets, FeatureSmallData]>; +def : Proc<"hexagonv71t", HexagonModelV71T, + [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, + ArchV68, ArchV69, ArchV71, + ProcTinyCore, ExtensionAudio, + FeatureCompound, FeatureMemNoShuf, FeatureMemops, + FeatureNVS, FeaturePackets, FeatureSmallData]>; + //===----------------------------------------------------------------------===// // Declare the target which we are implementing //===----------------------------------------------------------------------===// Index: llvm/lib/Target/Hexagon/HexagonDepArch.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepArch.h +++ llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -5,9 +5,6 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// Automatically generated file, do not edit! -//===----------------------------------------------------------------------===// - #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H @@ -16,7 +13,21 @@ namespace llvm { namespace Hexagon { -enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68, V69 }; +enum class ArchEnum { + NoArch, + Generic, + V5, + V55, + V60, + V62, + V65, + V66, + V67, + V68, + V69, + V71, + V73 +}; inline Optional getCpu(StringRef CPU) { return StringSwitch>(CPU) @@ -31,6 +42,9 @@ .Case("hexagonv67t", Hexagon::ArchEnum::V67) .Case("hexagonv68", Hexagon::ArchEnum::V68) .Case("hexagonv69", Hexagon::ArchEnum::V69) + .Case("hexagonv71", Hexagon::ArchEnum::V71) + .Case("hexagonv71t", Hexagon::ArchEnum::V71) + .Case("hexagonv73", Hexagon::ArchEnum::V73) .Default(None); } } // namespace Hexagon Index: llvm/lib/Target/Hexagon/HexagonDepArch.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepArch.td +++ llvm/lib/Target/Hexagon/HexagonDepArch.td @@ -26,3 +26,7 @@ def HasV68 : Predicate<"HST->hasV68Ops()">, AssemblerPredicate<(all_of ArchV68)>; def ArchV69: SubtargetFeature<"v69", "HexagonArchVersion", "Hexagon::ArchEnum::V69", "Enable Hexagon V69 architecture">; def HasV69 : Predicate<"HST->hasV69Ops()">, AssemblerPredicate<(all_of ArchV69)>; +def ArchV71: SubtargetFeature<"v71", "HexagonArchVersion", "Hexagon::ArchEnum::V71", "Enable Hexagon V71 architecture">; +def HasV71 : Predicate<"HST->hasV71Ops()">, AssemblerPredicate<(all_of ArchV71)>; +def ArchV73: SubtargetFeature<"v73", "HexagonArchVersion", "Hexagon::ArchEnum::V73", "Enable Hexagon V73 architecture">; +def HasV73 : Predicate<"HST->hasV73Ops()">, AssemblerPredicate<(all_of ArchV73)>; Index: llvm/lib/Target/Hexagon/HexagonDepIICHVX.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepIICHVX.td +++ llvm/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -8,6 +8,7 @@ // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// +def tc_0390c1ca : InstrItinClass; def tc_04da405a : InstrItinClass; def tc_05ca8cfd : InstrItinClass; def tc_08a4f1b6 : InstrItinClass; @@ -22,7 +23,7 @@ def tc_1ad8a370 : InstrItinClass; def tc_1ba8a0cd : InstrItinClass; def tc_20a4bbec : InstrItinClass; -def tc_2120355e : InstrItinClass; +def tc_227864f7 : InstrItinClass; def tc_257f6f7c : InstrItinClass; def tc_26a377fe : InstrItinClass; def tc_2b4c548e : InstrItinClass; @@ -44,6 +45,7 @@ def tc_4942646a : InstrItinClass; def tc_51d0ecc3 : InstrItinClass; def tc_52447ecc : InstrItinClass; +def tc_531b383c : InstrItinClass; def tc_540c3da3 : InstrItinClass; def tc_54a0dc47 : InstrItinClass; def tc_561aaa58 : InstrItinClass; @@ -75,6 +77,8 @@ def tc_90bcc1db : InstrItinClass; def tc_933f2b39 : InstrItinClass; def tc_946013d8 : InstrItinClass; +def tc_9a1cab75 : InstrItinClass; +def tc_9aff7a2a : InstrItinClass; def tc_9d1dc972 : InstrItinClass; def tc_9f363d21 : InstrItinClass; def tc_a02a10a8 : InstrItinClass; @@ -83,7 +87,6 @@ def tc_a28f32b5 : InstrItinClass; def tc_a69eeee1 : InstrItinClass; def tc_a7e6707d : InstrItinClass; -def tc_aa047364 : InstrItinClass; def tc_ab23f776 : InstrItinClass; def tc_abe8c3b2 : InstrItinClass; def tc_ac4046bc : InstrItinClass; @@ -106,6 +109,7 @@ def tc_dd5b0695 : InstrItinClass; def tc_df80eeb0 : InstrItinClass; def tc_e2d2e9e5 : InstrItinClass; +def tc_e2fdd6e6 : InstrItinClass; def tc_e35c1e93 : InstrItinClass; def tc_e3f68a46 : InstrItinClass; def tc_e675c45a : InstrItinClass; @@ -117,6 +121,13 @@ class DepHVXItinV55 { list DepHVXItinV55_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 5], @@ -192,9 +203,12 @@ InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7], - [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, InstrItinData , @@ -306,6 +320,10 @@ InstrStage<1, [CVI_LD]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -467,6 +485,20 @@ InstrStage<1, [CVI_XLANE]>], [9, 5], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], @@ -513,10 +545,6 @@ InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7, 7], - [HVX_FWD, HVX_FWD, HVX_FWD]>, - InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -635,6 +663,10 @@ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], @@ -681,6 +713,13 @@ class DepHVXItinV60 { list DepHVXItinV60_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 5], @@ -756,9 +795,12 @@ InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7], - [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, InstrItinData , @@ -870,6 +912,10 @@ InstrStage<1, [CVI_LD]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -1031,6 +1077,20 @@ InstrStage<1, [CVI_XLANE]>], [9, 5], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], @@ -1077,10 +1137,6 @@ InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7, 7], - [HVX_FWD, HVX_FWD, HVX_FWD]>, - InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -1199,6 +1255,10 @@ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], @@ -1245,6 +1305,13 @@ class DepHVXItinV62 { list DepHVXItinV62_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 5], @@ -1320,9 +1387,12 @@ InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7], - [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, InstrItinData , @@ -1434,6 +1504,10 @@ InstrStage<1, [CVI_LD]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -1595,6 +1669,20 @@ InstrStage<1, [CVI_XLANE]>], [9, 5], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], @@ -1641,10 +1729,6 @@ InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7, 7], - [HVX_FWD, HVX_FWD, HVX_FWD]>, - InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -1763,6 +1847,10 @@ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], @@ -1809,6 +1897,13 @@ class DepHVXItinV65 { list DepHVXItinV65_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 5], @@ -1884,9 +1979,12 @@ InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7], - [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, InstrItinData , @@ -1998,6 +2096,10 @@ InstrStage<1, [CVI_LD]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -2159,6 +2261,20 @@ InstrStage<1, [CVI_XLANE]>], [9, 5], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], @@ -2205,10 +2321,6 @@ InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7, 7], - [HVX_FWD, HVX_FWD, HVX_FWD]>, - InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -2327,6 +2439,10 @@ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], @@ -2373,6 +2489,13 @@ class DepHVXItinV66 { list DepHVXItinV66_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 5], @@ -2448,9 +2571,12 @@ InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7], - [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, InstrItinData , @@ -2562,6 +2688,10 @@ InstrStage<1, [CVI_LD]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -2723,6 +2853,20 @@ InstrStage<1, [CVI_XLANE]>], [9, 5], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], @@ -2769,10 +2913,6 @@ InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7, 7], - [HVX_FWD, HVX_FWD, HVX_FWD]>, - InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -2891,6 +3031,10 @@ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], @@ -2937,6 +3081,13 @@ class DepHVXItinV67 { list DepHVXItinV67_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 5], @@ -3012,9 +3163,12 @@ InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7], - [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, InstrItinData , @@ -3126,6 +3280,10 @@ InstrStage<1, [CVI_LD]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -3287,6 +3445,20 @@ InstrStage<1, [CVI_XLANE]>], [9, 5], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], @@ -3333,10 +3505,6 @@ InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7, 7], - [HVX_FWD, HVX_FWD, HVX_FWD]>, - InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -3455,6 +3623,10 @@ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], @@ -3501,6 +3673,13 @@ class DepHVXItinV68 { list DepHVXItinV68_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 5], @@ -3576,9 +3755,12 @@ InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7], - [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, InstrItinData , @@ -3690,6 +3872,10 @@ InstrStage<1, [CVI_LD]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -3851,6 +4037,20 @@ InstrStage<1, [CVI_XLANE]>], [9, 5], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], @@ -3897,10 +4097,6 @@ InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7, 7], - [HVX_FWD, HVX_FWD, HVX_FWD]>, - InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -4019,6 +4215,10 @@ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], @@ -4065,6 +4265,13 @@ class DepHVXItinV69 { list DepHVXItinV69_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 5], @@ -4140,9 +4347,12 @@ InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7], - [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, InstrItinData , @@ -4254,6 +4464,10 @@ InstrStage<1, [CVI_LD]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], @@ -4415,6 +4629,20 @@ InstrStage<1, [CVI_XLANE]>], [9, 5], [HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], @@ -4461,10 +4689,6 @@ InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData ], [9, 7, 7], - [HVX_FWD, HVX_FWD, HVX_FWD]>, - InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -4583,6 +4807,1194 @@ InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]> + ]; +} + +class DepHVXItinV71 { + list DepHVXItinV71_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData ], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]> + ]; +} + +class DepHVXItinV73 { + list DepHVXItinV73_list = [ + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE], 0>, + InstrStage<1, [CVI_MPY01]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData ], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], Index: llvm/lib/Target/Hexagon/HexagonDepIICScalar.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepIICScalar.td +++ llvm/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -38,11 +38,13 @@ def tc_2471c1c8 : InstrItinClass; def tc_24e109c7 : InstrItinClass; def tc_24f426ab : InstrItinClass; +def tc_27106296 : InstrItinClass; def tc_280f7fe1 : InstrItinClass; def tc_28e55c6f : InstrItinClass; def tc_2c13e7f5 : InstrItinClass; def tc_2c3e17fc : InstrItinClass; def tc_2f573607 : InstrItinClass; +def tc_33e7e673 : InstrItinClass; def tc_362b0be2 : InstrItinClass; def tc_38382228 : InstrItinClass; def tc_388f9897 : InstrItinClass; @@ -107,6 +109,7 @@ def tc_7b9187d3 : InstrItinClass; def tc_7c31e19a : InstrItinClass; def tc_7c6d32e4 : InstrItinClass; +def tc_7dc63b5c : InstrItinClass; def tc_7f7f45f5 : InstrItinClass; def tc_7f8ae742 : InstrItinClass; def tc_8035e91f : InstrItinClass; @@ -120,12 +123,14 @@ def tc_8a825db2 : InstrItinClass; def tc_8b5bd4f5 : InstrItinClass; def tc_8e82e8ca : InstrItinClass; +def tc_8f36a2fd : InstrItinClass; def tc_9124c04f : InstrItinClass; def tc_92240447 : InstrItinClass; def tc_934753bb : InstrItinClass; def tc_937dd41c : InstrItinClass; def tc_9406230a : InstrItinClass; def tc_95a33176 : InstrItinClass; +def tc_95f43c5e : InstrItinClass; def tc_96ef76ef : InstrItinClass; def tc_975a4e54 : InstrItinClass; def tc_9783714b : InstrItinClass; @@ -155,6 +160,7 @@ def tc_addc37a8 : InstrItinClass; def tc_ae5babd7 : InstrItinClass; def tc_aee6250c : InstrItinClass; +def tc_af6af259 : InstrItinClass; def tc_b1ae5f67 : InstrItinClass; def tc_b4dc7630 : InstrItinClass; def tc_b7c4062a : InstrItinClass; @@ -183,6 +189,7 @@ def tc_decdde8a : InstrItinClass; def tc_df5d53f9 : InstrItinClass; def tc_e3d699e3 : InstrItinClass; +def tc_e60def48 : InstrItinClass; def tc_e9170fb7 : InstrItinClass; def tc_ed03645c : InstrItinClass; def tc_eed07714 : InstrItinClass; @@ -196,6 +203,7 @@ def tc_f529831b : InstrItinClass; def tc_f6e2aff9 : InstrItinClass; def tc_f7569068 : InstrItinClass; +def tc_f97707c1 : InstrItinClass; def tc_f999c66e : InstrItinClass; def tc_fae9dfa5 : InstrItinClass; def tc_fedb7e19 : InstrItinClass; @@ -232,11 +240,13 @@ InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, + InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, + InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, @@ -301,6 +311,7 @@ InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, + InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, @@ -314,12 +325,14 @@ InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, + InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, + InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, @@ -349,6 +362,7 @@ InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, + InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, @@ -377,6 +391,7 @@ InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, + InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, @@ -390,6 +405,7 @@ InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, + InstrItinData ]>, InstrItinData ]>, InstrItinData ]>, InstrItinData ]> ]; @@ -517,6 +533,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -537,6 +557,10 @@ [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, + InstrItinData ], [1], [Hex_FWD]>, @@ -793,6 +817,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 4, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -845,6 +873,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -869,6 +901,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -985,6 +1021,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -1097,6 +1137,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [3, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -1149,6 +1193,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 4, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -1285,6 +1333,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1305,6 +1357,10 @@ [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, + InstrItinData ], [1], [Hex_FWD]>, @@ -1561,6 +1617,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1613,6 +1673,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -1637,6 +1701,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -1753,6 +1821,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -1865,6 +1937,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -1917,6 +1993,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -2058,6 +2138,10 @@ InstrStage<1, [CVI_ST]>], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -2079,6 +2163,11 @@ InstrStage<1, [CVI_ST]>], [2, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [], + []>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1], @@ -2348,6 +2437,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -2400,6 +2493,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -2424,6 +2521,11 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -2543,6 +2645,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -2660,6 +2766,11 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [1], + [Hex_FWD]>, + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -2716,6 +2827,11 @@ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_ST]>], [2], + [Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -2853,6 +2969,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -2873,6 +2993,10 @@ [InstrStage<1, [SLOT2]>], [2, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, + InstrItinData ], [1], [Hex_FWD]>, @@ -3129,6 +3253,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3181,6 +3309,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -3205,6 +3337,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3321,6 +3457,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -3433,6 +3573,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [4, 2], [Hex_FWD, Hex_FWD]>, @@ -3485,6 +3629,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -3621,6 +3769,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3641,6 +3793,10 @@ [InstrStage<1, [SLOT2]>], [2, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, + InstrItinData ], [1], [Hex_FWD]>, @@ -3897,6 +4053,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -3949,6 +4109,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -3973,6 +4137,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -4089,6 +4257,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -4201,6 +4373,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, @@ -4253,6 +4429,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -4389,6 +4569,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -4409,6 +4593,10 @@ [InstrStage<1, [SLOT2]>], [2, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, + InstrItinData ], [1], [Hex_FWD]>, @@ -4665,6 +4853,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -4717,6 +4909,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -4741,6 +4937,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -4857,6 +5057,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -4969,6 +5173,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, @@ -5021,6 +5229,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -5157,6 +5369,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -5177,6 +5393,10 @@ [InstrStage<1, [SLOT2]>], [2, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, + InstrItinData ], [1], [Hex_FWD]>, @@ -5433,6 +5653,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -5485,6 +5709,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -5509,6 +5737,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -5625,6 +5857,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -5737,6 +5973,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, @@ -5789,6 +6029,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -5925,6 +6169,10 @@ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -5945,6 +6193,10 @@ [InstrStage<1, [SLOT2]>], [2, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, + InstrItinData ], [1], [Hex_FWD]>, @@ -6201,6 +6453,10 @@ [InstrStage<1, [SLOT0]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -6253,6 +6509,10 @@ [InstrStage<1, [SLOT0]>], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -6277,6 +6537,10 @@ [InstrStage<1, [SLOT0, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -6393,6 +6657,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -6505,6 +6773,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, @@ -6557,6 +6829,10 @@ [InstrStage<1, [SLOT3]>], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -6693,6 +6969,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -6713,6 +6993,10 @@ [InstrStage<1, [SLOT2]>], [2, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, + InstrItinData ], [1], [Hex_FWD]>, @@ -6969,6 +7253,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -7021,6 +7309,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -7045,6 +7337,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -7161,6 +7457,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -7273,6 +7573,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, @@ -7325,6 +7629,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, @@ -7461,6 +7769,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [2, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -7481,6 +7793,10 @@ [InstrStage<1, [SLOT2]>], [2, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [], + []>, + InstrItinData ], [1], [Hex_FWD]>, @@ -7737,6 +8053,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + InstrItinData ], [5, 5, 1], [Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -7789,6 +8109,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [3, 1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [3, 2], [Hex_FWD, Hex_FWD]>, @@ -7813,6 +8137,10 @@ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>], [4, 2, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], + [Hex_FWD]>, + InstrItinData ], [1, 1, 2, 3], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, @@ -7929,6 +8257,10 @@ [InstrStage<1, [SLOT0, SLOT1]>], [4, 1], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [1], [Hex_FWD]>, @@ -8041,6 +8373,10 @@ [InstrStage<1, [SLOT2, SLOT3]>], [4, 2], [Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [4, 1], [Hex_FWD, Hex_FWD]>, @@ -8093,6 +8429,2410 @@ [InstrStage<1, [SLOT2, SLOT3]>], [5, 5, 1, 1], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> + ]; +} + +class DepScalarItinV71 { + list DepScalarItinV71_list = [ + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> + ]; +} + +class DepScalarItinV71T { + list DepScalarItinV71T_list = [ + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> + ]; +} + +class DepScalarItinV73 { + list DepScalarItinV73_list = [ + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + InstrItinData ], [2, 2], [Hex_FWD, Hex_FWD]>, Index: llvm/lib/Target/Hexagon/HexagonDepITypes.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepITypes.h +++ llvm/lib/Target/Hexagon/HexagonDepITypes.h @@ -8,7 +8,6 @@ // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// - #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPITYPES_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPITYPES_H Index: llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td +++ llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td @@ -44,6 +44,14 @@ let Inst{13-13} = n1{1-1}; let Inst{8-8} = n1{0-0}; } +class Enc_046afa : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vss32; + let Inst{4-0} = Vss32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_04c959 : OpcodeHexagon { bits <2> Ii; let Inst{13-13} = Ii{1-1}; @@ -898,6 +906,10 @@ bits <5> Vxx32; let Inst{4-0} = Vxx32{4-0}; } +class Enc_403871 : OpcodeHexagon { + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_405228 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -1341,6 +1353,14 @@ bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } +class Enc_5eb169 : OpcodeHexagon { + bits <3> Ii; + let Inst{10-8} = Ii{2-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_607661 : OpcodeHexagon { bits <6> Ii; let Inst{12-7} = Ii{5-0}; @@ -1394,6 +1414,15 @@ bits <5> Rt32; let Inst{4-0} = Rt32{4-0}; } +class Enc_634460 : OpcodeHexagon { + bits <4> Ii; + let Inst{13-13} = Ii{3-3}; + let Inst{10-8} = Ii{2-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; +} class Enc_63eaeb : OpcodeHexagon { bits <2> Ii; let Inst{1-0} = Ii{1-0}; @@ -1771,6 +1800,14 @@ let Inst{25-22} = n1{4-1}; let Inst{13-13} = n1{0-0}; } +class Enc_80296d : OpcodeHexagon { + bits <5> Rs32; + let Inst{12-8} = Rs32{4-0}; + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Rd32; + let Inst{4-0} = Rd32{4-0}; +} class Enc_802dc0 : OpcodeHexagon { bits <1> Ii; let Inst{8-8} = Ii{0-0}; @@ -1791,6 +1828,14 @@ bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; } +class Enc_829a68 : OpcodeHexagon { + bits <1> Mu2; + let Inst{13-13} = Mu2{0-0}; + bits <5> Vdd32; + let Inst{4-0} = Vdd32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_830e5d : OpcodeHexagon { bits <8> Ii; let Inst{12-5} = Ii{7-0}; @@ -2481,6 +2526,14 @@ bits <5> Rtt32; let Inst{12-8} = Rtt32{4-0}; } +class Enc_b025d6 : OpcodeHexagon { + bits <3> Ii; + let Inst{10-8} = Ii{2-0}; + bits <5> Vss32; + let Inst{4-0} = Vss32{4-0}; + bits <5> Rx32; + let Inst{20-16} = Rx32{4-0}; +} class Enc_b05839 : OpcodeHexagon { bits <7> Ii; let Inst{8-5} = Ii{6-3}; @@ -2672,6 +2725,15 @@ bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_b98b95 : OpcodeHexagon { + bits <4> Ii; + let Inst{13-13} = Ii{3-3}; + let Inst{10-8} = Ii{2-0}; + bits <5> Rt32; + let Inst{20-16} = Rt32{4-0}; + bits <5> Vss32; + let Inst{4-0} = Vss32{4-0}; +} class Enc_b9c5fb : OpcodeHexagon { bits <5> Rss32; let Inst{20-16} = Rss32{4-0}; @@ -2734,6 +2796,12 @@ bits <5> Rdd32; let Inst{4-0} = Rdd32{4-0}; } +class Enc_bea5da : OpcodeHexagon { + bits <10> Ii; + let Inst{17-16} = Ii{9-8}; + let Inst{12-8} = Ii{7-3}; + let Inst{4-2} = Ii{2-0}; +} class Enc_bfbf03 : OpcodeHexagon { bits <2> Qs4; let Inst{9-8} = Qs4{1-0}; @@ -2829,6 +2897,14 @@ bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_c89067 : OpcodeHexagon { + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; + bits <5> Rx32; + let Inst{12-8} = Rx32{4-0}; +} class Enc_c90aca : OpcodeHexagon { bits <8> Ii; let Inst{12-5} = Ii{7-0}; @@ -2954,6 +3030,11 @@ bits <5> Rx32; let Inst{20-16} = Rx32{4-0}; } +class Enc_d0fe02 : OpcodeHexagon { + bits <5> Rxx32; + let Inst{20-16} = Rxx32{4-0}; + bits <0> sgp10; +} class Enc_d15d19 : OpcodeHexagon { bits <1> Mu2; let Inst{13-13} = Mu2{0-0}; @@ -3538,6 +3619,14 @@ bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_fc4562 : OpcodeHexagon { + bits <5> Rs32; + let Inst{12-8} = Rs32{4-0}; + bits <5> Rtt32; + let Inst{20-16} = Rtt32{4-0}; + bits <5> Rdd32; + let Inst{4-0} = Rdd32{4-0}; +} class Enc_fcf7a7 : OpcodeHexagon { bits <5> Rss32; let Inst{20-16} = Rss32{4-0}; Index: llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -4959,6 +4959,18 @@ let hasSideEffects = 1; let isTaken = Inst{12}; } +def J2_callrh : HInst< +(outs), +(ins IntRegs:$Rs32), +"callrh $Rs32", +tc_95f43c5e, TypeJ>, Enc_ecbcc8, Requires<[HasV73]> { +let Inst{13-0} = 0b00000000000000; +let Inst{31-21} = 0b01010000110; +let isCall = 1; +let prefersSlot3 = 1; +let cofMax1 = 1; +let Defs = [PC, R31]; +} def J2_callrt : HInst< (outs), (ins PredRegs:$Pu4, IntRegs:$Rs32), @@ -5308,6 +5320,19 @@ let Defs = [PC]; let isTaken = Inst{12}; } +def J2_jumprh : HInst< +(outs), +(ins IntRegs:$Rs32), +"jumprh $Rs32", +tc_f97707c1, TypeJ>, Enc_ecbcc8, Requires<[HasV73]> { +let Inst{13-0} = 0b00000000000000; +let Inst{31-21} = 0b01010010110; +let isTerminator = 1; +let isIndirectBranch = 1; +let isBranch = 1; +let cofMax1 = 1; +let Defs = [PC]; +} def J2_jumprltez : HInst< (outs), (ins IntRegs:$Rs32, b13_2Imm:$Ii), @@ -5680,13 +5705,13 @@ } def J2_pause : HInst< (outs), -(ins u8_0Imm:$Ii), +(ins u10_0Imm:$Ii), "pause(#$Ii)", -tc_d57d649c, TypeJ>, Enc_a51a9a { +tc_d57d649c, TypeJ>, Enc_bea5da { let Inst{1-0} = 0b00; let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; -let Inst{31-16} = 0b0101010001000000; +let Inst{31-18} = 0b01010100010000; let isSolo = 1; } def J2_ploop1si : HInst< @@ -5838,6 +5863,15 @@ let isPseudo = 1; let isCodeGenOnly = 1; } +def J2_unpause : HInst< +(outs), +(ins), +"unpause", +tc_33e7e673, TypeJ>, Enc_e3b0c4, Requires<[HasV73]> { +let Inst{13-0} = 0b01000000000000; +let Inst{31-16} = 0b0101011111100000; +let isSolo = 1; +} def J4_cmpeq_f_jumpnv_nt : HInst< (outs), (ins IntRegs:$Ns8, IntRegs:$Rt32, b30_2Imm:$Ii), @@ -8494,7 +8528,7 @@ (outs), (ins IntRegs:$Rs32), "hintjr($Rs32)", -tc_60e324ff, TypeJ>, Enc_ecbcc8 { +tc_e60def48, TypeJ>, Enc_ecbcc8 { let Inst{13-0} = 0b00000000000000; let Inst{31-21} = 0b01010010101; let isTerminator = 1; @@ -13775,6 +13809,18 @@ let isPseudo = 1; let isCodeGenOnly = 1; } +def L6_linecpy : HInst< +(outs DoubleRegs:$Rdd32), +(ins IntRegs:$Rs32, DoubleRegs:$Rtt32), +"$Rdd32 = linecpy($Rs32,$Rtt32)", +tc_8f36a2fd, TypeLD>, Enc_fc4562, Requires<[HasV73]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b10011001111; +let mayLoad = 1; +let isSolo = 1; +let mayStore = 1; +} def L6_memcpy : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32, ModRegs:$Mu2), @@ -13786,6 +13832,33 @@ let isSolo = 1; let mayStore = 1; } +def L6_movlen : HInst< +(outs IntRegs:$Rd32), +(ins IntRegs:$Rs32, DoubleRegs:$Rtt32), +"$Rd32 = movlen($Rs32,$Rtt32)", +tc_5a4b5e58, TypeCR>, Enc_80296d, Requires<[HasV73]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b01101111111; +let hasNewValue = 1; +let opNewValue = 0; +let isSolo = 1; +} +def L6_pmemcpy : HInst< +(outs DoubleRegs:$Rdd32, IntRegs:$Rx32), +(ins IntRegs:$Rx32in, DoubleRegs:$Rtt32), +"$Rdd32 = pmemcpy($Rx32,$Rtt32)", +tc_af6af259, TypeLD>, Enc_c89067, Requires<[HasV73]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b10011001111; +let hasNewValue = 1; +let opNewValue = 1; +let mayLoad = 1; +let isSolo = 1; +let mayStore = 1; +let Constraints = "$Rx32 = $Rx32in"; +} def L6_return_map_to_raw : HInst< (outs), (ins), @@ -26741,6 +26814,31 @@ let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } +def V6_dbl_ld0 : HInst< +(outs HvxWR:$Vdd32), +(ins IntRegs:$Rt32), +"$Vdd32 = vmem($Rt32)", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV73]> { +let hasNewValue = 1; +let opNewValue = 0; +let isCVLoad = 1; +let isCVI = 1; +let mayLoad = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_dbl_st0 : HInst< +(outs), +(ins IntRegs:$Rt32, HvxWR:$Vss32), +"vmem($Rt32) = $Vss32", +PSEUDO, TypeMAPPING>, Requires<[UseHVXV73]> { +let isCVI = 1; +let mayStore = 1; +let isPseudo = 1; +let isCodeGenOnly = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_extractw : HInst< (outs IntRegs:$Rd32), (ins HvxVR:$Vu32, IntRegs:$Rs32), @@ -26752,6 +26850,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let isSolo = 1; let mayLoad = 1; let DecoderNamespace = "EXT_mmvec"; @@ -27045,6 +27145,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_pred_or : HInst< @@ -27470,6 +27572,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ai"; @@ -27493,6 +27596,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ai"; @@ -27516,6 +27620,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ai"; @@ -27538,6 +27643,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_pi"; @@ -27560,6 +27666,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ppu"; @@ -27581,6 +27688,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_pi"; @@ -27603,6 +27711,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ppu"; @@ -27625,6 +27734,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ai"; @@ -27646,6 +27756,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_pi"; @@ -27667,6 +27778,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ppu"; @@ -27688,6 +27800,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ai"; @@ -27709,6 +27822,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; @@ -27730,6 +27844,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ppu"; @@ -27750,6 +27865,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27774,6 +27890,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27798,6 +27915,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27821,6 +27939,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27844,6 +27963,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27866,6 +27986,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27889,6 +28010,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27912,6 +28034,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27934,6 +28057,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27956,6 +28080,7 @@ let isCVLoad = 1; let isCVI = 1; let CVINew = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -27978,6 +28103,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28000,6 +28126,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28022,6 +28149,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28043,6 +28171,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28066,6 +28195,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28089,6 +28219,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28110,6 +28241,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28131,6 +28263,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28356,6 +28489,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; @@ -28378,6 +28512,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ppu"; @@ -28400,6 +28535,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ai"; @@ -28420,6 +28556,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; @@ -28440,6 +28577,7 @@ let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ppu"; @@ -28641,6 +28779,64 @@ let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } +def V6_vL64b_ai : HInst< +(outs HvxWR:$Vdd32), +(ins IntRegs:$Rt32, s4_0Imm:$Ii), +"$Vdd32 = vmem($Rt32+#$Ii)", +tc_0390c1ca, TypeCVI_VM_LD>, Enc_634460, Requires<[UseHVXV73]> { +let Inst{7-5} = 0b011; +let Inst{12-11} = 0b00; +let Inst{31-21} = 0b00101000010; +let hasNewValue = 1; +let opNewValue = 0; +let addrMode = BaseImmOffset; +let accessSize = HVXVectorAccess; +let isCVLoad = 1; +let isCVI = 1; +let isHVXALU = 1; +let mayLoad = 1; +let isRestrictNoSlot1Store = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vL64b_pi : HInst< +(outs HvxWR:$Vdd32, IntRegs:$Rx32), +(ins IntRegs:$Rx32in, s3_0Imm:$Ii), +"$Vdd32 = vmem($Rx32++#$Ii)", +tc_9a1cab75, TypeCVI_VM_LD>, Enc_5eb169, Requires<[UseHVXV73]> { +let Inst{7-5} = 0b011; +let Inst{13-11} = 0b000; +let Inst{31-21} = 0b00101001010; +let hasNewValue = 1; +let opNewValue = 0; +let addrMode = PostInc; +let accessSize = HVXVectorAccess; +let isCVLoad = 1; +let isCVI = 1; +let isHVXALU = 1; +let mayLoad = 1; +let isRestrictNoSlot1Store = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Rx32 = $Rx32in"; +} +def V6_vL64b_ppu : HInst< +(outs HvxWR:$Vdd32, IntRegs:$Rx32), +(ins IntRegs:$Rx32in, ModRegs:$Mu2), +"$Vdd32 = vmem($Rx32++$Mu2)", +tc_9a1cab75, TypeCVI_VM_LD>, Enc_829a68, Requires<[UseHVXV73]> { +let Inst{12-5} = 0b00000011; +let Inst{31-21} = 0b00101011010; +let hasNewValue = 1; +let opNewValue = 0; +let addrMode = PostInc; +let accessSize = HVXVectorAccess; +let isCVLoad = 1; +let isCVI = 1; +let isHVXALU = 1; +let mayLoad = 1; +let isRestrictNoSlot1Store = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Rx32 = $Rx32in"; +} def V6_vS32Ub_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), @@ -28802,6 +28998,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let CextOpcode = "V6_vS32b"; @@ -29007,6 +29204,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let isNVStorable = 1; @@ -29025,6 +29223,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let isNVStorable = 1; @@ -29043,6 +29242,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isNVStorable = 1; @@ -29059,6 +29259,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -29073,6 +29274,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -29087,6 +29289,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -29102,6 +29305,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; @@ -29317,6 +29521,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; @@ -29336,6 +29541,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; @@ -29355,6 +29561,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; @@ -29372,6 +29579,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29387,6 +29595,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29402,6 +29611,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29418,6 +29628,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; @@ -29437,6 +29648,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; @@ -29456,6 +29668,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; @@ -29474,6 +29687,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; @@ -29492,6 +29706,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; @@ -29509,6 +29724,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29524,6 +29740,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29539,6 +29756,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let isNonTemporal = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29555,6 +29773,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let CextOpcode = "V6_vS32b"; @@ -29573,6 +29792,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isNVStorable = 1; @@ -29591,6 +29811,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; let isNVStorable = 1; @@ -29608,6 +29829,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; let isNVStorable = 1; @@ -29625,6 +29847,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ppu"; let isNVStorable = 1; @@ -29641,6 +29864,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -29655,6 +29879,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -29669,6 +29894,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVI = 1; +let isHVXALU = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -29719,6 +29945,52 @@ let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } +def V6_vS64b_ai : HInst< +(outs), +(ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxWR:$Vss32), +"vmem($Rt32+#$Ii) = $Vss32", +tc_9aff7a2a, TypeCVI_VM_ST>, Enc_b98b95, Requires<[UseHVXV73]> { +let Inst{7-5} = 0b010; +let Inst{12-11} = 0b00; +let Inst{31-21} = 0b00101000011; +let addrMode = BaseImmOffset; +let accessSize = HVXVectorAccess; +let isCVI = 1; +let isHVXALU = 1; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vS64b_pi : HInst< +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxWR:$Vss32), +"vmem($Rx32++#$Ii) = $Vss32", +tc_227864f7, TypeCVI_VM_ST>, Enc_b025d6, Requires<[UseHVXV73]> { +let Inst{7-5} = 0b010; +let Inst{13-11} = 0b000; +let Inst{31-21} = 0b00101001011; +let addrMode = PostInc; +let accessSize = HVXVectorAccess; +let isCVI = 1; +let isHVXALU = 1; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Rx32 = $Rx32in"; +} +def V6_vS64b_ppu : HInst< +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in, ModRegs:$Mu2, HvxWR:$Vss32), +"vmem($Rx32++$Mu2) = $Vss32", +tc_227864f7, TypeCVI_VM_ST>, Enc_046afa, Requires<[UseHVXV73]> { +let Inst{12-5} = 0b00000010; +let Inst{31-21} = 0b00101011011; +let addrMode = PostInc; +let accessSize = HVXVectorAccess; +let isCVI = 1; +let isHVXALU = 1; +let mayStore = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Rx32 = $Rx32in"; +} def V6_vabs_hf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), @@ -29756,6 +30028,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vabsb_alt : HInst< @@ -29781,6 +30055,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vabsb_sat_alt : HInst< @@ -29906,6 +30182,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vabsh_alt : HInst< @@ -29931,6 +30209,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vabsh_sat_alt : HInst< @@ -29992,6 +30272,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vabsw_alt : HInst< @@ -30017,6 +30299,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vabsw_sat_alt : HInst< @@ -30122,6 +30406,19 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vadd_sf_bf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.sf = vadd($Vu32.bf,$Vv32.bf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV73,UseHVXIEEEFP]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011101010; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vadd_sf_hf : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -30159,6 +30456,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vaddb_alt : HInst< @@ -30211,6 +30510,8 @@ let opNewValue = 0; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -30241,6 +30542,8 @@ let opNewValue = 0; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -30269,6 +30572,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vaddbsat_alt : HInst< @@ -30319,6 +30624,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -30332,9 +30639,9 @@ let Inst{31-21} = 0b00011101101; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vaddcarrysat : HInst< @@ -30348,6 +30655,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vaddclbh : HInst< @@ -30387,6 +30696,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vaddh_alt : HInst< @@ -30439,6 +30750,8 @@ let opNewValue = 0; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -30469,6 +30782,8 @@ let opNewValue = 0; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -30497,6 +30812,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vaddhsat_alt : HInst< @@ -30655,6 +30972,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vaddubsat_alt : HInst< @@ -30705,6 +31024,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vadduhsat : HInst< @@ -30718,6 +31039,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vadduhsat_alt : HInst< @@ -30822,6 +31145,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vadduwsat_alt : HInst< @@ -30872,6 +31197,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vaddw_alt : HInst< @@ -30924,6 +31251,8 @@ let opNewValue = 0; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -30954,6 +31283,8 @@ let opNewValue = 0; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -30982,6 +31313,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vaddwsat_alt : HInst< @@ -31057,6 +31390,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vandnqrt : HInst< @@ -31179,6 +31514,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vandvqv : HInst< @@ -31193,6 +31530,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vandvrt : HInst< @@ -31624,6 +31963,7 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let hasUnaryRestriction = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vasrvuhubsat : HInst< @@ -31637,6 +31977,7 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let hasUnaryRestriction = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vasrvwuhrndsat : HInst< @@ -31650,6 +31991,7 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let hasUnaryRestriction = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vasrvwuhsat : HInst< @@ -31663,6 +32005,7 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let hasUnaryRestriction = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vasrw : HInst< @@ -31820,6 +32163,7 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vassign_fp : HInst< @@ -31839,7 +32183,7 @@ (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), "$Vd32.tmp = $Vu32", -tc_2120355e, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV69]> { +tc_e2fdd6e6, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV69]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-16} = 0b0001111000000001; @@ -31871,6 +32215,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavgb_alt : HInst< @@ -31896,6 +32242,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavgbrnd_alt : HInst< @@ -31921,6 +32269,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavgh_alt : HInst< @@ -31946,6 +32296,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavghrnd_alt : HInst< @@ -31971,6 +32323,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavgub_alt : HInst< @@ -31996,6 +32350,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavgubrnd_alt : HInst< @@ -32021,6 +32377,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavguh_alt : HInst< @@ -32046,6 +32404,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavguhrnd_alt : HInst< @@ -32071,6 +32431,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavguw_alt : HInst< @@ -32096,6 +32458,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavguwrnd_alt : HInst< @@ -32121,6 +32485,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavgw_alt : HInst< @@ -32146,6 +32512,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vavgwrnd_alt : HInst< @@ -32236,6 +32604,7 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vcombine : HInst< @@ -32256,7 +32625,7 @@ (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vdd32.tmp = vcombine($Vu32,$Vv32)", -tc_aa047364, TypeCVI_VX>, Enc_71bb9b, Requires<[UseHVXV69]> { +tc_531b383c, TypeCVI_VX>, Enc_71bb9b, Requires<[UseHVXV69]> { let Inst{7-5} = 0b111; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011110101; @@ -32266,6 +32635,32 @@ let hasHvxTmp = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vconv_h_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.h = $Vu32.hf", +tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV73]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_hf_h : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = $Vu32.h", +tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV73]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vconv_hf_qf16 : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), @@ -32305,6 +32700,32 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vconv_sf_w : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.sf = $Vu32.w", +tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV73]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_w_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = $Vu32.sf", +tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV73]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vcvt_b_hf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -32318,6 +32739,19 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vcvt_bf_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.bf = vcvt($Vu32.sf,$Vv32.sf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV73,UseHVXIEEEFP]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011101010; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vcvt_h_hf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), @@ -33155,6 +33589,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_veqb_and : HInst< @@ -33166,6 +33602,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33179,6 +33617,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33191,6 +33631,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33205,6 +33647,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_veqh_and : HInst< @@ -33216,6 +33660,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33229,6 +33675,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33241,6 +33689,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33255,6 +33705,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_veqw_and : HInst< @@ -33266,6 +33718,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33279,6 +33733,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33291,6 +33747,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33384,6 +33842,7 @@ let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -33400,6 +33859,7 @@ let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -33448,6 +33908,7 @@ let accessSize = WordAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -33464,6 +33925,7 @@ let accessSize = WordAccess; let isCVLoad = 1; let isCVI = 1; +let isHVXALU = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -33479,6 +33941,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vgtb_and : HInst< @@ -33490,6 +33954,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33503,6 +33969,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33515,6 +33983,66 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgtbf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.gt($Vu32.bf,$Vv32.bf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV73,UseHVXQFloat]> { +let Inst{7-2} = 0b011110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgtbf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.gt($Vu32.bf,$Vv32.bf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV73,UseHVXQFloat]> { +let Inst{7-2} = 0b110100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgtbf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.gt($Vu32.bf,$Vv32.bf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV73,UseHVXQFloat]> { +let Inst{7-2} = 0b001110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgtbf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.gt($Vu32.bf,$Vv32.bf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV73,UseHVXQFloat]> { +let Inst{7-2} = 0b111100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33529,6 +34057,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vgth_and : HInst< @@ -33540,6 +34070,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33553,6 +34085,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33565,6 +34099,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33579,6 +34115,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vgthf_and : HInst< @@ -33590,6 +34128,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33603,6 +34143,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33615,6 +34157,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33629,6 +34173,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vgtsf_and : HInst< @@ -33640,6 +34186,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33653,6 +34201,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33665,6 +34215,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33679,6 +34231,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vgtub_and : HInst< @@ -33690,6 +34244,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33703,6 +34259,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33715,6 +34273,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33729,6 +34289,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vgtuh_and : HInst< @@ -33740,6 +34302,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33753,6 +34317,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33765,6 +34331,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33779,6 +34347,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vgtuw_and : HInst< @@ -33790,6 +34360,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33803,6 +34375,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33815,6 +34389,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33829,6 +34405,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vgtw_and : HInst< @@ -33840,6 +34418,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33853,6 +34433,8 @@ let Inst{31-21} = 0b00011100100; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -33865,6 +34447,8 @@ let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100100; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -34187,6 +34771,19 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmax_bf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.bf = vmax($Vu32.bf,$Vv32.bf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV73,UseHVXIEEEFP]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011101010; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vmax_hf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -34198,6 +34795,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vmax_sf : HInst< @@ -34211,6 +34810,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vmaxb : HInst< @@ -34224,6 +34825,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vmaxb_alt : HInst< @@ -34249,6 +34852,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vmaxh_alt : HInst< @@ -34274,6 +34879,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vmaxub_alt : HInst< @@ -34299,6 +34906,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vmaxuh_alt : HInst< @@ -34324,6 +34933,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vmaxw_alt : HInst< @@ -34338,6 +34949,19 @@ let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmin_bf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.bf = vmin($Vu32.bf,$Vv32.bf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV73,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011101010; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vmin_hf : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -34349,6 +34973,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vmin_sf : HInst< @@ -34362,6 +34988,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vminb : HInst< @@ -34375,6 +35003,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vminb_alt : HInst< @@ -34400,6 +35030,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vminh_alt : HInst< @@ -34425,6 +35057,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vminub_alt : HInst< @@ -34450,6 +35084,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vminuh_alt : HInst< @@ -34475,6 +35111,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vminw_alt : HInst< @@ -34929,6 +35567,34 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmpy_sf_bf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.sf = vmpy($Vu32.bf,$Vv32.bf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV73,UseHVXIEEEFP]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011101010; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_sf_bf_acc : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Vxx32.sf += vmpy($Vu32.bf,$Vv32.bf)", +tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV73,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011101000; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} def V6_vmpy_sf_hf : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -36216,6 +36882,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vnavgb : HInst< @@ -36229,6 +36897,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vnavgb_alt : HInst< @@ -36254,6 +36924,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vnavgh_alt : HInst< @@ -36279,6 +36951,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vnavgub_alt : HInst< @@ -36304,6 +36978,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vnavgw_alt : HInst< @@ -36346,6 +37022,7 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vnormamth : HInst< @@ -36409,6 +37086,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vor : HInst< @@ -36422,6 +37101,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vpackeb : HInst< @@ -37725,6 +38406,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsathub : HInst< @@ -37738,6 +38421,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsathub_alt : HInst< @@ -37763,6 +38448,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsatuwuh_alt : HInst< @@ -37788,6 +38475,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsatwh_alt : HInst< @@ -37836,6 +38525,8 @@ let Inst{31-21} = 0b00101111001; let accessSize = HalfWordAccess; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -37849,6 +38540,8 @@ let accessSize = HalfWordAccess; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -37882,6 +38575,8 @@ let Inst{31-21} = 0b00101111100; let accessSize = HalfWordAccess; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -37941,6 +38636,8 @@ let Inst{31-21} = 0b00101111001; let accessSize = WordAccess; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -37954,6 +38651,8 @@ let accessSize = WordAccess; let isAccumulator = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -38018,6 +38717,8 @@ let Inst{31-21} = 0b00101111100; let accessSize = WordAccess; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -38067,6 +38768,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vshufeh_alt : HInst< @@ -38133,6 +38836,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vshuffeb_alt : HInst< @@ -38183,6 +38888,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vshuffob_alt : HInst< @@ -38271,6 +38978,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vshufoh_alt : HInst< @@ -38376,6 +39085,19 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vsub_sf_bf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.sf = vsub($Vu32.bf,$Vv32.bf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV73,UseHVXIEEEFP]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011101010; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsub_sf_hf : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -38413,6 +39135,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubb_alt : HInst< @@ -38464,6 +39188,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -38492,6 +39218,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -38519,6 +39247,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubbsat_alt : HInst< @@ -38569,6 +39299,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } @@ -38582,9 +39314,9 @@ let Inst{31-21} = 0b00011101101; let hasNewValue = 1; let opNewValue = 0; -let hasNewValue2 = 1; -let opNewValue2 = 1; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubh : HInst< @@ -38598,6 +39330,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubh_alt : HInst< @@ -38649,6 +39383,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -38677,6 +39413,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -38704,6 +39442,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubhsat_alt : HInst< @@ -38804,6 +39544,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsububsat_alt : HInst< @@ -38854,6 +39596,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubuhsat : HInst< @@ -38867,6 +39611,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubuhsat_alt : HInst< @@ -38942,6 +39688,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubuwsat_alt : HInst< @@ -38992,6 +39740,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubw_alt : HInst< @@ -39043,6 +39793,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -39071,6 +39823,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } @@ -39098,6 +39852,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vsubwsat_alt : HInst< @@ -39581,6 +40337,8 @@ let hasNewValue = 1; let opNewValue = 0; let isCVI = 1; +let isHVXALU = 1; +let isHVXALU2SRC = 1; let DecoderNamespace = "EXT_mmvec"; } def V6_vzb : HInst< @@ -39774,11 +40532,24 @@ let Inst{31-16} = 0b0110110000100000; let isSolo = 1; } +def Y2_crswap0 : HInst< +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in), +"crswap($Rx32,sgp0)", +tc_7dc63b5c, TypeCR>, Enc_403871 { +let Inst{13-0} = 0b00000000000000; +let Inst{31-21} = 0b01100101000; +let hasNewValue = 1; +let opNewValue = 0; +let Uses = [SGP0]; +let Defs = [SGP0]; +let Constraints = "$Rx32 = $Rx32in"; +} def Y2_crswap_old : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in), "crswap($Rx32,sgp)", -PSEUDO, TypeMAPPING> { +tc_7dc63b5c, TypeMAPPING> { let hasNewValue = 1; let opNewValue = 0; let isPseudo = 1; @@ -39918,6 +40689,30 @@ let Inst{31-21} = 0b01100100010; let isSolo = 1; } +def Y4_crswap1 : HInst< +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in), +"crswap($Rx32,sgp1)", +tc_7dc63b5c, TypeCR>, Enc_403871 { +let Inst{13-0} = 0b00000000000000; +let Inst{31-21} = 0b01100101001; +let hasNewValue = 1; +let opNewValue = 0; +let Uses = [SGP1]; +let Defs = [SGP1]; +let Constraints = "$Rx32 = $Rx32in"; +} +def Y4_crswap10 : HInst< +(outs DoubleRegs:$Rxx32), +(ins DoubleRegs:$Rxx32in, sgp10Const:$sgp10), +"crswap($Rxx32,$sgp10)", +tc_27106296, TypeCR>, Enc_d0fe02 { +let Inst{13-0} = 0b00000000000000; +let Inst{31-21} = 0b01101101100; +let Uses = [SGP0, SGP1]; +let Defs = [SGP0, SGP1]; +let Constraints = "$Rxx32 = $Rxx32in"; +} def Y4_l2fetch : HInst< (outs), (ins IntRegs:$Rs32, IntRegs:$Rt32), @@ -40102,7 +40897,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32)", -tc_388f9897, TypeALU32_3op>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_3op>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -40112,7 +40907,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = add($Rs32,#$Ii)", -tc_388f9897, TypeALU32_ADDI>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_ADDI>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -40127,7 +40922,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = and($Rs32,#$Ii)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -40142,7 +40937,7 @@ (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, s8_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -40155,7 +40950,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxtb($Rs32)", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -40165,7 +40960,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxth($Rs32)", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -40175,7 +40970,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = $Rs32", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -40185,7 +40980,7 @@ (outs IntRegs:$Rd32), (ins s32_0Imm:$Ii), "$Rd32 = #$Ii", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -40200,7 +40995,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxtb($Rs32)", -PSEUDO, TypeMAPPING>, Requires<[HasV69]> { +PSEUDO, TypeMAPPING>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -40210,7 +41005,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxth($Rs32)", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -40220,7 +41015,7 @@ (outs DoubleRegs:$Rdd32), (ins s8_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -40233,7 +41028,7 @@ (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, IntRegs:$Rs32), "$Rdd32 = combine(#$Ii,$Rs32)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -40246,7 +41041,7 @@ (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rdd32 = combine($Rs32,#$Ii)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -40259,7 +41054,7 @@ (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4) $Rd32 = #$Ii", -tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -40276,7 +41071,7 @@ (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4) $Rd32 = #$Ii", -tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -40292,7 +41087,7 @@ (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4.new) $Rd32 = #$Ii", -tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> { +tc_4ac61d92, TypeALU32_2op>, Requires<[HasV73]> { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -40310,7 +41105,7 @@ (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4.new) $Rd32 = #$Ii", -tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> { +tc_4ac61d92, TypeALU32_2op>, Requires<[HasV73]> { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -40327,7 +41122,7 @@ (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmp.eq($Rs32,#$Ii)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV73]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -40340,7 +41135,7 @@ (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = deallocframe($Rs32):raw", -tc_aee6250c, TypeLD>, Requires<[HasV69]> { +tc_aee6250c, TypeLD>, Requires<[HasV73]> { let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; let mayLoad = 1; @@ -40352,7 +41147,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memb($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV69]> { +tc_eed07714, TypeLD>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -40370,7 +41165,7 @@ (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s29_3Imm:$Ii), "$Rdd32 = memd($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV69]> { +tc_eed07714, TypeLD>, Requires<[HasV73]> { let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; @@ -40386,7 +41181,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memh($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV69]> { +tc_eed07714, TypeLD>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -40404,7 +41199,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s30_2Imm:$Ii), "$Rd32 = memw($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV69]> { +tc_eed07714, TypeLD>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -40422,7 +41217,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memub($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV69]> { +tc_eed07714, TypeLD>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -40440,7 +41235,7 @@ (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memuh($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV69]> { +tc_eed07714, TypeLD>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -40458,7 +41253,7 @@ (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, u11_3Imm:$Ii), "allocframe($Rx32,#$Ii):raw", -tc_74a42bda, TypeST>, Requires<[HasV69]> { +tc_74a42bda, TypeST>, Requires<[HasV73]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -40474,7 +41269,7 @@ (outs), (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) = $Rt32", -tc_a9edeffa, TypeST>, Requires<[HasV69]> { +tc_a9edeffa, TypeST>, Requires<[HasV73]> { let addrMode = BaseImmOffset; let accessSize = ByteAccess; let AsmVariantName = "NonParsable"; @@ -40490,7 +41285,7 @@ (outs), (ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32), "memd($Rs32+#$Ii) = $Rtt32", -tc_a9edeffa, TypeST>, Requires<[HasV69]> { +tc_a9edeffa, TypeST>, Requires<[HasV73]> { let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; @@ -40506,7 +41301,7 @@ (outs), (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) = $Rt32", -tc_a9edeffa, TypeST>, Requires<[HasV69]> { +tc_a9edeffa, TypeST>, Requires<[HasV73]> { let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let AsmVariantName = "NonParsable"; @@ -40522,7 +41317,7 @@ (outs), (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) = $Rt32", -tc_a9edeffa, TypeST>, Requires<[HasV69]> { +tc_a9edeffa, TypeST>, Requires<[HasV73]> { let addrMode = BaseImmOffset; let accessSize = WordAccess; let AsmVariantName = "NonParsable"; @@ -40538,7 +41333,7 @@ (outs), (ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "memb($Rs32+#$Ii) = #$II", -tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> { +tc_838c4d7a, TypeV4LDST>, Requires<[HasV73]> { let addrMode = BaseImmOffset; let accessSize = ByteAccess; let AsmVariantName = "NonParsable"; @@ -40554,7 +41349,7 @@ (outs), (ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "memw($Rs32+#$Ii) = #$II", -tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> { +tc_838c4d7a, TypeV4LDST>, Requires<[HasV73]> { let addrMode = BaseImmOffset; let accessSize = WordAccess; let AsmVariantName = "NonParsable"; Index: llvm/lib/Target/Hexagon/HexagonDepInstrIntrinsics.inc =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepInstrIntrinsics.inc +++ llvm/lib/Target/Hexagon/HexagonDepInstrIntrinsics.inc @@ -1015,6 +1015,7 @@ {Hexagon::V6_vadd_qf32, Intrinsic::hexagon_V6_vadd_qf32, Intrinsic::hexagon_V6_vadd_qf32_128B}, {Hexagon::V6_vadd_qf32_mix, Intrinsic::hexagon_V6_vadd_qf32_mix, Intrinsic::hexagon_V6_vadd_qf32_mix_128B}, {Hexagon::V6_vadd_sf, Intrinsic::hexagon_V6_vadd_sf, Intrinsic::hexagon_V6_vadd_sf_128B}, +{Hexagon::V6_vadd_sf_bf, Intrinsic::hexagon_V6_vadd_sf_bf, Intrinsic::hexagon_V6_vadd_sf_bf_128B}, {Hexagon::V6_vadd_sf_hf, Intrinsic::hexagon_V6_vadd_sf_hf, Intrinsic::hexagon_V6_vadd_sf_hf_128B}, {Hexagon::V6_vadd_sf_sf, Intrinsic::hexagon_V6_vadd_sf_sf, Intrinsic::hexagon_V6_vadd_sf_sf_128B}, {Hexagon::V6_vaddb, Intrinsic::hexagon_V6_vaddb, Intrinsic::hexagon_V6_vaddb_128B}, @@ -1112,10 +1113,15 @@ {Hexagon::V6_vcl0h, Intrinsic::hexagon_V6_vcl0h, Intrinsic::hexagon_V6_vcl0h_128B}, {Hexagon::V6_vcl0w, Intrinsic::hexagon_V6_vcl0w, Intrinsic::hexagon_V6_vcl0w_128B}, {Hexagon::V6_vcombine, Intrinsic::hexagon_V6_vcombine, Intrinsic::hexagon_V6_vcombine_128B}, +{Hexagon::V6_vconv_h_hf, Intrinsic::hexagon_V6_vconv_h_hf, Intrinsic::hexagon_V6_vconv_h_hf_128B}, +{Hexagon::V6_vconv_hf_h, Intrinsic::hexagon_V6_vconv_hf_h, Intrinsic::hexagon_V6_vconv_hf_h_128B}, {Hexagon::V6_vconv_hf_qf16, Intrinsic::hexagon_V6_vconv_hf_qf16, Intrinsic::hexagon_V6_vconv_hf_qf16_128B}, {Hexagon::V6_vconv_hf_qf32, Intrinsic::hexagon_V6_vconv_hf_qf32, Intrinsic::hexagon_V6_vconv_hf_qf32_128B}, {Hexagon::V6_vconv_sf_qf32, Intrinsic::hexagon_V6_vconv_sf_qf32, Intrinsic::hexagon_V6_vconv_sf_qf32_128B}, +{Hexagon::V6_vconv_sf_w, Intrinsic::hexagon_V6_vconv_sf_w, Intrinsic::hexagon_V6_vconv_sf_w_128B}, +{Hexagon::V6_vconv_w_sf, Intrinsic::hexagon_V6_vconv_w_sf, Intrinsic::hexagon_V6_vconv_w_sf_128B}, {Hexagon::V6_vcvt_b_hf, Intrinsic::hexagon_V6_vcvt_b_hf, Intrinsic::hexagon_V6_vcvt_b_hf_128B}, +{Hexagon::V6_vcvt_bf_sf, Intrinsic::hexagon_V6_vcvt_bf_sf, Intrinsic::hexagon_V6_vcvt_bf_sf_128B}, {Hexagon::V6_vcvt_h_hf, Intrinsic::hexagon_V6_vcvt_h_hf, Intrinsic::hexagon_V6_vcvt_h_hf_128B}, {Hexagon::V6_vcvt_hf_b, Intrinsic::hexagon_V6_vcvt_hf_b, Intrinsic::hexagon_V6_vcvt_hf_b_128B}, {Hexagon::V6_vcvt_hf_h, Intrinsic::hexagon_V6_vcvt_hf_h, Intrinsic::hexagon_V6_vcvt_hf_h_128B}, @@ -1182,6 +1188,10 @@ {Hexagon::V6_vgtb_and, Intrinsic::hexagon_V6_vgtb_and, Intrinsic::hexagon_V6_vgtb_and_128B}, {Hexagon::V6_vgtb_or, Intrinsic::hexagon_V6_vgtb_or, Intrinsic::hexagon_V6_vgtb_or_128B}, {Hexagon::V6_vgtb_xor, Intrinsic::hexagon_V6_vgtb_xor, Intrinsic::hexagon_V6_vgtb_xor_128B}, +{Hexagon::V6_vgtbf, Intrinsic::hexagon_V6_vgtbf, Intrinsic::hexagon_V6_vgtbf_128B}, +{Hexagon::V6_vgtbf_and, Intrinsic::hexagon_V6_vgtbf_and, Intrinsic::hexagon_V6_vgtbf_and_128B}, +{Hexagon::V6_vgtbf_or, Intrinsic::hexagon_V6_vgtbf_or, Intrinsic::hexagon_V6_vgtbf_or_128B}, +{Hexagon::V6_vgtbf_xor, Intrinsic::hexagon_V6_vgtbf_xor, Intrinsic::hexagon_V6_vgtbf_xor_128B}, {Hexagon::V6_vgth, Intrinsic::hexagon_V6_vgth, Intrinsic::hexagon_V6_vgth_128B}, {Hexagon::V6_vgth_and, Intrinsic::hexagon_V6_vgth_and, Intrinsic::hexagon_V6_vgth_and_128B}, {Hexagon::V6_vgth_or, Intrinsic::hexagon_V6_vgth_or, Intrinsic::hexagon_V6_vgth_or_128B}, @@ -1229,6 +1239,7 @@ {Hexagon::V6_vlutvwh_oracc, Intrinsic::hexagon_V6_vlutvwh_oracc, Intrinsic::hexagon_V6_vlutvwh_oracc_128B}, {Hexagon::V6_vlutvwh_oracci, Intrinsic::hexagon_V6_vlutvwh_oracci, Intrinsic::hexagon_V6_vlutvwh_oracci_128B}, {Hexagon::V6_vlutvwhi, Intrinsic::hexagon_V6_vlutvwhi, Intrinsic::hexagon_V6_vlutvwhi_128B}, +{Hexagon::V6_vmax_bf, Intrinsic::hexagon_V6_vmax_bf, Intrinsic::hexagon_V6_vmax_bf_128B}, {Hexagon::V6_vmax_hf, Intrinsic::hexagon_V6_vmax_hf, Intrinsic::hexagon_V6_vmax_hf_128B}, {Hexagon::V6_vmax_sf, Intrinsic::hexagon_V6_vmax_sf, Intrinsic::hexagon_V6_vmax_sf_128B}, {Hexagon::V6_vmaxb, Intrinsic::hexagon_V6_vmaxb, Intrinsic::hexagon_V6_vmaxb_128B}, @@ -1236,6 +1247,7 @@ {Hexagon::V6_vmaxub, Intrinsic::hexagon_V6_vmaxub, Intrinsic::hexagon_V6_vmaxub_128B}, {Hexagon::V6_vmaxuh, Intrinsic::hexagon_V6_vmaxuh, Intrinsic::hexagon_V6_vmaxuh_128B}, {Hexagon::V6_vmaxw, Intrinsic::hexagon_V6_vmaxw, Intrinsic::hexagon_V6_vmaxw_128B}, +{Hexagon::V6_vmin_bf, Intrinsic::hexagon_V6_vmin_bf, Intrinsic::hexagon_V6_vmin_bf_128B}, {Hexagon::V6_vmin_hf, Intrinsic::hexagon_V6_vmin_hf, Intrinsic::hexagon_V6_vmin_hf_128B}, {Hexagon::V6_vmin_sf, Intrinsic::hexagon_V6_vmin_sf, Intrinsic::hexagon_V6_vmin_sf_128B}, {Hexagon::V6_vminb, Intrinsic::hexagon_V6_vminb, Intrinsic::hexagon_V6_vminb_128B}, @@ -1266,6 +1278,8 @@ {Hexagon::V6_vmpy_qf32_mix_hf, Intrinsic::hexagon_V6_vmpy_qf32_mix_hf, Intrinsic::hexagon_V6_vmpy_qf32_mix_hf_128B}, {Hexagon::V6_vmpy_qf32_qf16, Intrinsic::hexagon_V6_vmpy_qf32_qf16, Intrinsic::hexagon_V6_vmpy_qf32_qf16_128B}, {Hexagon::V6_vmpy_qf32_sf, Intrinsic::hexagon_V6_vmpy_qf32_sf, Intrinsic::hexagon_V6_vmpy_qf32_sf_128B}, +{Hexagon::V6_vmpy_sf_bf, Intrinsic::hexagon_V6_vmpy_sf_bf, Intrinsic::hexagon_V6_vmpy_sf_bf_128B}, +{Hexagon::V6_vmpy_sf_bf_acc, Intrinsic::hexagon_V6_vmpy_sf_bf_acc, Intrinsic::hexagon_V6_vmpy_sf_bf_acc_128B}, {Hexagon::V6_vmpy_sf_hf, Intrinsic::hexagon_V6_vmpy_sf_hf, Intrinsic::hexagon_V6_vmpy_sf_hf_128B}, {Hexagon::V6_vmpy_sf_hf_acc, Intrinsic::hexagon_V6_vmpy_sf_hf_acc, Intrinsic::hexagon_V6_vmpy_sf_hf_acc_128B}, {Hexagon::V6_vmpy_sf_sf, Intrinsic::hexagon_V6_vmpy_sf_sf, Intrinsic::hexagon_V6_vmpy_sf_sf_128B}, @@ -1399,6 +1413,7 @@ {Hexagon::V6_vsub_qf32, Intrinsic::hexagon_V6_vsub_qf32, Intrinsic::hexagon_V6_vsub_qf32_128B}, {Hexagon::V6_vsub_qf32_mix, Intrinsic::hexagon_V6_vsub_qf32_mix, Intrinsic::hexagon_V6_vsub_qf32_mix_128B}, {Hexagon::V6_vsub_sf, Intrinsic::hexagon_V6_vsub_sf, Intrinsic::hexagon_V6_vsub_sf_128B}, +{Hexagon::V6_vsub_sf_bf, Intrinsic::hexagon_V6_vsub_sf_bf, Intrinsic::hexagon_V6_vsub_sf_bf_128B}, {Hexagon::V6_vsub_sf_hf, Intrinsic::hexagon_V6_vsub_sf_hf, Intrinsic::hexagon_V6_vsub_sf_hf_128B}, {Hexagon::V6_vsub_sf_sf, Intrinsic::hexagon_V6_vsub_sf_sf, Intrinsic::hexagon_V6_vsub_sf_sf_128B}, {Hexagon::V6_vsubb, Intrinsic::hexagon_V6_vsubb, Intrinsic::hexagon_V6_vsubb_128B}, Index: llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -3677,3 +3677,66 @@ (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV69, UseHVX64B]>; def: Pat<(int_hexagon_V6_vmpyuhvs_128B HvxVR:$src1, HvxVR:$src2), (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV69, UseHVX128B]>; + +// V73 HVX Instructions. + +def: Pat<(int_hexagon_V6_vadd_sf_bf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vadd_sf_bf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_h_hf HvxVR:$src1), + (V6_vconv_h_hf HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vconv_h_hf_128B HvxVR:$src1), + (V6_vconv_h_hf HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_hf_h HvxVR:$src1), + (V6_vconv_hf_h HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vconv_hf_h_128B HvxVR:$src1), + (V6_vconv_hf_h HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_sf_w HvxVR:$src1), + (V6_vconv_sf_w HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vconv_sf_w_128B HvxVR:$src1), + (V6_vconv_sf_w HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_w_sf HvxVR:$src1), + (V6_vconv_w_sf HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vconv_w_sf_128B HvxVR:$src1), + (V6_vconv_w_sf HvxVR:$src1)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_bf_sf HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_bf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_bf_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_bf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vgtbf HvxVR:$src1, HvxVR:$src2), + (V6_vgtbf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtbf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vgtbf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtbf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtbf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtbf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtbf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtbf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtbf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtbf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtbf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtbf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtbf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtbf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtbf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmax_bf HvxVR:$src1, HvxVR:$src2), + (V6_vmax_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmax_bf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmax_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmin_bf HvxVR:$src1, HvxVR:$src2), + (V6_vmin_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmin_bf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmin_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_bf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_bf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_bf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_sf_bf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_bf_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_sf_bf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[UseHVXV73, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vsub_sf_bf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vsub_sf_bf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_bf HvxVR:$src1, HvxVR:$src2)>, Requires<[UseHVXV73, UseHVX128B]>; Index: llvm/lib/Target/Hexagon/HexagonDepMappings.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepMappings.td +++ llvm/lib/Target/Hexagon/HexagonDepMappings.td @@ -165,6 +165,8 @@ def V6_MAP_equw_andAlias : InstAlias<"$Qx4 &= vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_and HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; def V6_MAP_equw_iorAlias : InstAlias<"$Qx4 |= vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_or HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; def V6_MAP_equw_xorAlias : InstAlias<"$Qx4 ^= vcmp.eq($Vu32.uw,$Vv32.uw)", (V6_veqw_xor HvxQR:$Qx4, HvxVR:$Vu32, HvxVR:$Vv32)>, Requires<[UseHVX]>; +def V6_dbl_ld0Alias : InstAlias<"$Vdd32 = vmem($Rt32)", (V6_vL64b_ai HvxWR:$Vdd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def V6_dbl_st0Alias : InstAlias<"vmem($Rt32) = $Vss32", (V6_vS64b_ai IntRegs:$Rt32, 0, HvxWR:$Vss32)>, Requires<[UseHVX]>; def V6_extractw_altAlias : InstAlias<"$Rd32.w = vextract($Vu32,$Rs32)", (V6_extractw IntRegs:$Rd32, HvxVR:$Vu32, IntRegs:$Rs32)>, Requires<[UseHVX]>; def V6_ld0Alias : InstAlias<"$Vd32 = vmem($Rt32)", (V6_vL32b_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; def V6_ldcnp0Alias : InstAlias<"if (!$Pv4) $Vd32.cur = vmem($Rt32)", (V6_vL32b_cur_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; @@ -471,4 +473,5 @@ def V6_vzh_altAlias : InstAlias<"$Vdd32 = vzxth($Vu32)", (V6_vzh HvxWR:$Vdd32, HvxVR:$Vu32)>, Requires<[UseHVX]>; def V6_zld0Alias : InstAlias<"z = vmem($Rt32)", (V6_zLd_ai IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; def V6_zldp0Alias : InstAlias<"if ($Pv4) z = vmem($Rt32)", (V6_zLd_pred_ai PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; +def Y2_crswap_oldAlias : InstAlias<"crswap($Rx32,sgp)", (Y2_crswap0 IntRegs:$Rx32)>; def Y2_dcfetchAlias : InstAlias<"dcfetch($Rs32)", (Y2_dcfetchbo IntRegs:$Rs32, 0)>; Index: llvm/lib/Target/Hexagon/HexagonDepMask.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepMask.h +++ llvm/lib/Target/Hexagon/HexagonDepMask.h @@ -8,7 +8,6 @@ // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// - #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H @@ -2817,4 +2816,4 @@ 0 } }; -#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPMASK_H \ No newline at end of file Index: llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h +++ llvm/lib/Target/Hexagon/HexagonDepTimingClasses.h @@ -8,7 +8,6 @@ // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// - #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H @@ -53,9 +52,11 @@ case Hexagon::Sched::tc_c57d9f39: case Hexagon::Sched::tc_d33e5eee: case Hexagon::Sched::tc_decdde8a: + case Hexagon::Sched::tc_e60def48: case Hexagon::Sched::tc_ed03645c: case Hexagon::Sched::tc_eeda4109: case Hexagon::Sched::tc_ef921005: + case Hexagon::Sched::tc_f97707c1: case Hexagon::Sched::tc_f999c66e: return true; default: @@ -97,6 +98,7 @@ inline bool is_TC2early(unsigned SchedClass) { switch (SchedClass) { + case Hexagon::Sched::tc_33e7e673: case Hexagon::Sched::tc_45f9d1be: case Hexagon::Sched::tc_a4ee89db: return true; @@ -110,6 +112,7 @@ case Hexagon::Sched::tc_01e1be3b: case Hexagon::Sched::tc_1248597c: case Hexagon::Sched::tc_197dce51: + case Hexagon::Sched::tc_27106296: case Hexagon::Sched::tc_28e55c6f: case Hexagon::Sched::tc_2c3e17fc: case Hexagon::Sched::tc_38382228: @@ -121,6 +124,7 @@ case Hexagon::Sched::tc_6ae3426b: case Hexagon::Sched::tc_6d861a95: case Hexagon::Sched::tc_788b1d09: + case Hexagon::Sched::tc_7dc63b5c: case Hexagon::Sched::tc_7f8ae742: case Hexagon::Sched::tc_9406230a: case Hexagon::Sched::tc_a154b476: @@ -153,4 +157,4 @@ } } // namespace llvm -#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H +#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPTIMINGCLASSES_H \ No newline at end of file Index: llvm/lib/Target/Hexagon/HexagonInstrFormats.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonInstrFormats.td +++ llvm/lib/Target/Hexagon/HexagonInstrFormats.td @@ -166,6 +166,15 @@ bit isCVI = 0; let TSFlags{59} = isCVI; + bit isHVXALU = 0; + let TSFlags{60} = isHVXALU; + + bit isHVXALU2SRC = 0; + let TSFlags{61} = isHVXALU2SRC; + + bit hasUnaryRestriction = 0; + let TSFlags{62} = hasUnaryRestriction; + // Fields used for relation models. bit isNonTemporal = 0; string isNT = ""; // set to "true" for non-temporal vector stores. Index: llvm/lib/Target/Hexagon/HexagonOperands.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonOperands.td +++ llvm/lib/Target/Hexagon/HexagonOperands.td @@ -23,10 +23,13 @@ int64_t v = (int64_t)N->getSExtValue(); return isUInt<9>(v); }]>; + def u64_0ImmOperand : AsmOperandClass { let Name = "u64_0Imm"; let RenderMethod = "addImmOperands"; } def u64_0Imm : Operand { let ParserMatchClass = u64_0ImmOperand; } def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; } def n1Const : Operand { let ParserMatchClass = n1ConstOperand; } +def sgp10ConstOperand : AsmOperandClass { let Name = "sgp10Const"; } +def sgp10Const : Operand { let ParserMatchClass = sgp10ConstOperand; } def bblabel : Operand; def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">; Index: llvm/lib/Target/Hexagon/HexagonSchedule.td =================================================================== --- llvm/lib/Target/Hexagon/HexagonSchedule.td +++ llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -70,3 +70,6 @@ include "HexagonScheduleV67T.td" include "HexagonScheduleV68.td" include "HexagonScheduleV69.td" +include "HexagonScheduleV71.td" +include "HexagonScheduleV71T.td" +include "HexagonScheduleV73.td" Index: llvm/lib/Target/Hexagon/HexagonScheduleV71.td =================================================================== --- /dev/null +++ llvm/lib/Target/Hexagon/HexagonScheduleV71.td @@ -0,0 +1,39 @@ +//=-HexagonScheduleV71.td - HexagonV71 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// ScalarItin and HVXItin contain some old itineraries still used by a handful +// of instructions. Hopefully, we will be able to get rid of them soon. +def HexagonV71ItinList : DepScalarItinV71, ScalarItin, + DepHVXItinV71, HVXItin, PseudoItin { + list ItinList = + !listconcat(DepScalarItinV71_list, ScalarItin_list, + DepHVXItinV71_list, HVXItin_list, PseudoItin_list); +} + +def HexagonItinerariesV71 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM, CVI_ZW], + [Hex_FWD, HVX_FWD], + HexagonV71ItinList.ItinList>; + +def HexagonModelV71 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV71; + let LoadLatency = 1; + let CompleteModel = 0; +} + +//===----------------------------------------------------------------------===// +// Hexagon V71 Resource Definitions - +//===----------------------------------------------------------------------===// + Index: llvm/lib/Target/Hexagon/HexagonScheduleV71T.td =================================================================== --- /dev/null +++ llvm/lib/Target/Hexagon/HexagonScheduleV71T.td @@ -0,0 +1,59 @@ +//=-HexagonScheduleV71T.td - Hexagon V71 Tiny Core Scheduling Definition ----=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +class HexagonV71TPseudoItin { + list V71TPseudoItin_list = [ + InstrItinData], [2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData, + InstrStage<1, [SLOT2, SLOT3]>], + [2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData], + [2, 1, 1]>, + InstrItinData], [2]> + ]; +} + +// +// HVXItin contains some old itineraries still used by a handful of +// instructions. Hopefully, we will be able to get rid of them soon. +def HexagonV71TItinList : DepScalarItinV71T, DepHVXItinV71, HVXItin, + HexagonV71TPseudoItin { + list V71TItin_list = [ + InstrItinData], + [3, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData], + [1, 1, 3, 3], + [Hex_FWD, Hex_FWD]> + ]; + list ItinList = + !listconcat(DepScalarItinV71T_list, V71TItin_list, DepHVXItinV71_list, + HVXItin_list, V71TPseudoItin_list); +} + +def HexagonItinerariesV71T : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM, CVI_ZW], + [Hex_FWD, HVX_FWD], + HexagonV71TItinList.ItinList>; + +def HexagonModelV71T : SchedMachineModel { + let IssueWidth = 3; + let Itineraries = HexagonItinerariesV71T; + let LoadLatency = 1; + let CompleteModel = 0; +} + +//===----------------------------------------------------------------------===// +// Hexagon V71 Tiny Core Resource Definitions - +//===----------------------------------------------------------------------===// Index: llvm/lib/Target/Hexagon/HexagonScheduleV73.td =================================================================== --- /dev/null +++ llvm/lib/Target/Hexagon/HexagonScheduleV73.td @@ -0,0 +1,39 @@ +//=-HexagonScheduleV73.td - HexagonV73 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// ScalarItin HVXItin contain some old itineraries // still used by a handful +// of instructions. Hopefully, we will be able to get rid of them soon. +def HexagonV73ItinList : DepScalarItinV73, ScalarItin, + DepHVXItinV73, HVXItin, PseudoItin { + list ItinList = + !listconcat(DepScalarItinV73_list, ScalarItin_list, + DepHVXItinV73_list, HVXItin_list, PseudoItin_list); +} + +def HexagonItinerariesV73 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM, CVI_ZW], + [Hex_FWD, HVX_FWD], + HexagonV73ItinList.ItinList>; + +def HexagonModelV73 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV73; + let LoadLatency = 1; + let CompleteModel = 0; +} + +//===----------------------------------------------------------------------===// +// Hexagon V73 Resource Definitions - +//===----------------------------------------------------------------------===// + Index: llvm/lib/Target/Hexagon/HexagonSubtarget.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -95,11 +95,11 @@ enum HexagonProcFamilyEnum { Others, TinyCore }; std::string CPUString; + HexagonProcFamilyEnum HexagonProcFamily = Others; Triple TargetTriple; // The following objects can use the TargetTriple, so they must be // declared after it. - HexagonProcFamilyEnum HexagonProcFamily = Others; HexagonInstrInfo InstrInfo; HexagonRegisterInfo RegInfo; HexagonTargetLowering TLInfo; @@ -198,6 +198,18 @@ bool hasV69OpsOnly() const { return getHexagonArchVersion() == Hexagon::ArchEnum::V69; } + bool hasV71Ops() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V71; + } + bool hasV71OpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V71; + } + bool hasV73Ops() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V73; + } + bool hasV73OpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V73; + } bool useAudioOps() const { return UseAudioOps; } bool useCompound() const { return UseCompound; } @@ -243,6 +255,12 @@ bool useHVXV69Ops() const { return HexagonHVXVersion >= Hexagon::ArchEnum::V69; } + bool useHVXV71Ops() const { + return HexagonHVXVersion >= Hexagon::ArchEnum::V71; + } + bool useHVXV73Ops() const { + return HexagonHVXVersion >= Hexagon::ArchEnum::V73; + } bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; } bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; } Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h =================================================================== --- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -162,6 +162,15 @@ isCVIPos = 59, isCVIMask = 0x1, + + isHVXALUPos = 60, + isHVXALUMask = 0x1, + + isHVXALU2SRCPos = 61, + isHVXALU2SRCMask = 0x1, + + hasUnaryRestrictionPos = 62, + hasUnaryRestrictionMask = 0x1, }; // *** The code above must match HexagonInstrFormat*.td *** // Index: llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp =================================================================== --- llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -84,24 +84,30 @@ cl::init(false)); cl::opt MV69("mv69", cl::Hidden, cl::desc("Build for Hexagon V69"), cl::init(false)); - -cl::opt - EnableHVX("mhvx", - cl::desc("Enable Hexagon Vector eXtensions"), - cl::values( - clEnumValN(Hexagon::ArchEnum::V60, "v60", "Build for HVX v60"), - clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"), - clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"), - clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"), - clEnumValN(Hexagon::ArchEnum::V67, "v67", "Build for HVX v67"), - clEnumValN(Hexagon::ArchEnum::V68, "v68", "Build for HVX v68"), - clEnumValN(Hexagon::ArchEnum::V69, "v69", "Build for HVX v69"), - // Sentinel for no value specified. - clEnumValN(Hexagon::ArchEnum::Generic, "", "")), - // Sentinel for flag not present. - cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional); +cl::opt MV71("mv71", cl::Hidden, cl::desc("Build for Hexagon V71"), + cl::init(false)); +cl::opt MV71T("mv71t", cl::Hidden, cl::desc("Build for Hexagon V71T"), + cl::init(false)); +cl::opt MV73("mv73", cl::Hidden, cl::desc("Build for Hexagon V73"), + cl::init(false)); } // namespace +cl::opt EnableHVX( + "mhvx", cl::desc("Enable Hexagon Vector eXtensions"), + cl::values(clEnumValN(Hexagon::ArchEnum::V60, "v60", "Build for HVX v60"), + clEnumValN(Hexagon::ArchEnum::V62, "v62", "Build for HVX v62"), + clEnumValN(Hexagon::ArchEnum::V65, "v65", "Build for HVX v65"), + clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"), + clEnumValN(Hexagon::ArchEnum::V67, "v67", "Build for HVX v67"), + clEnumValN(Hexagon::ArchEnum::V68, "v68", "Build for HVX v68"), + clEnumValN(Hexagon::ArchEnum::V69, "v69", "Build for HVX v69"), + clEnumValN(Hexagon::ArchEnum::V71, "v71", "Build for HVX v71"), + clEnumValN(Hexagon::ArchEnum::V73, "v73", "Build for HVX v73"), + // Sentinel for no value specified. + clEnumValN(Hexagon::ArchEnum::Generic, "", "")), + // Sentinel for flag not present. + cl::init(Hexagon::ArchEnum::NoArch), cl::ValueOptional); + static cl::opt DisableHVX("mno-hvx", cl::Hidden, cl::desc("Disable Hexagon Vector eXtensions")); @@ -135,6 +141,12 @@ return "hexagonv68"; if (MV69) return "hexagonv69"; + if (MV71) + return "hexagonv71"; + if (MV71T) + return "hexagonv71t"; + if (MV73) + return "hexagonv73"; return ""; } @@ -143,10 +155,10 @@ if (!ArchV.empty() && !CPU.empty()) { // Tiny cores have a "t" suffix that is discarded when creating a secondary // non-tiny subtarget. See: addArchSubtarget - std::pair ArchP = ArchV.split('t'); - std::pair CPUP = CPU.split('t'); + std::pair ArchP = ArchV.split('t'); + std::pair CPUP = CPU.split('t'); if (!ArchP.first.equals(CPUP.first)) - report_fatal_error("conflicting architectures specified."); + report_fatal_error("conflicting architectures specified."); return CPU; } if (ArchV.empty()) { @@ -391,6 +403,12 @@ case Hexagon::ArchEnum::V69: Result.push_back("+hvxv69"); break; + case Hexagon::ArchEnum::V71: + Result.push_back("+hvxv71"); + break; + case Hexagon::ArchEnum::V73: + Result.push_back("+hvxv73"); + break; case Hexagon::ArchEnum::Generic:{ Result.push_back(StringSwitch(CPU) .Case("hexagonv60", "+hvxv60") @@ -400,7 +418,10 @@ .Case("hexagonv67", "+hvxv67") .Case("hexagonv67t", "+hvxv67") .Case("hexagonv68", "+hvxv68") - .Case("hexagonv69", "+hvxv69")); + .Case("hexagonv69", "+hvxv69") + .Case("hexagonv71", "+hvxv71") + .Case("hexagonv71t", "+hvxv71") + .Case("hexagonv73", "+hvxv73")); break; } case Hexagon::ArchEnum::NoArch: @@ -448,8 +469,8 @@ // turns on hvxvNN, corresponding to the existing ArchVNN. FeatureBitset FB = S; unsigned CpuArch = ArchV5; - for (unsigned F : {ArchV69, ArchV68, ArchV67, ArchV66, ArchV65, ArchV62, - ArchV60, ArchV55, ArchV5}) { + for (unsigned F : {ArchV73, ArchV71, ArchV69, ArchV68, ArchV67, ArchV66, + ArchV65, ArchV62, ArchV60, ArchV55, ArchV5}) { if (!FB.test(F)) continue; CpuArch = F; @@ -465,7 +486,7 @@ bool HasHvxVer = false; for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68, - ExtensionHVXV69}) { + ExtensionHVXV69, ExtensionHVXV71, ExtensionHVXV73}) { if (!FB.test(F)) continue; HasHvxVer = true; @@ -478,27 +499,33 @@ // HasHvxVer is false, and UseHvx is true. switch (CpuArch) { + case ArchV73: + FB.set(ExtensionHVXV73); + [[fallthrough]]; + case ArchV71: + FB.set(ExtensionHVXV71); + [[fallthrough]]; case ArchV69: FB.set(ExtensionHVXV69); [[fallthrough]]; - case ArchV68: - FB.set(ExtensionHVXV68); - [[fallthrough]]; - case ArchV67: - FB.set(ExtensionHVXV67); - [[fallthrough]]; - case ArchV66: - FB.set(ExtensionHVXV66); - [[fallthrough]]; - case ArchV65: - FB.set(ExtensionHVXV65); - [[fallthrough]]; - case ArchV62: - FB.set(ExtensionHVXV62); - [[fallthrough]]; - case ArchV60: - FB.set(ExtensionHVXV60); - break; + case ArchV68: + FB.set(ExtensionHVXV68); + [[fallthrough]]; + case ArchV67: + FB.set(ExtensionHVXV67); + [[fallthrough]]; + case ArchV66: + FB.set(ExtensionHVXV66); + [[fallthrough]]; + case ArchV65: + FB.set(ExtensionHVXV65); + [[fallthrough]]; + case ArchV62: + FB.set(ExtensionHVXV62); + [[fallthrough]]; + case ArchV60: + FB.set(ExtensionHVXV60); + break; } return FB; } @@ -512,11 +539,11 @@ MCSubtargetInfo *X = createHexagonMCSubtargetInfoImpl( TT, CPUName, /*TuneCPU*/ CPUName, ArchFS); - if (X != nullptr && (CPUName == "hexagonv67t")) + if (X != nullptr && (CPUName == "hexagonv67t" || CPUName == "hexagon71t")) addArchSubtarget(X, ArchFS); if (CPU.equals("help")) - exit(0); + exit(0); if (!isCPUValid(CPUName.str())) { errs() << "error: invalid CPU \"" << CPUName.str().c_str() @@ -552,8 +579,7 @@ return X; } -void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, - StringRef FS) { +void Hexagon_MC::addArchSubtarget(MCSubtargetInfo const *STI, StringRef FS) { assert(STI != nullptr); if (STI->getCPU().contains("t")) { auto ArchSTI = createHexagonMCSubtargetInfo( @@ -577,7 +603,10 @@ .Case("hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67) .Case("hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T) .Case("hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68) - .Case("hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69); + .Case("hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69) + .Case("hexagonv71", llvm::ELF::EF_HEXAGON_MACH_V71) + .Case("hexagonv71t", llvm::ELF::EF_HEXAGON_MACH_V71T) + .Case("hexagonv73", llvm::ELF::EF_HEXAGON_MACH_V73); } llvm::ArrayRef Hexagon_MC::GetVectRegRev() { @@ -606,12 +635,12 @@ return false; //assert(!HexagonMCInstrInfo::isBundle(Inst)); - if(!HexagonMCInstrInfo::isExtendable(*Info, Inst)) + if (!HexagonMCInstrInfo::isExtendable(*Info, Inst)) return false; auto const &Extended(HexagonMCInstrInfo::getExtendableOperand(*Info, Inst)); assert(Extended.isExpr()); int64_t Value; - if(!Extended.getExpr()->evaluateAsAbsolute(Value)) + if (!Extended.getExpr()->evaluateAsAbsolute(Value)) return false; Target = Value; return true; @@ -637,8 +666,8 @@ createHexagonMCRegisterInfo); // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(getTheHexagonTarget(), - Hexagon_MC::createHexagonMCSubtargetInfo); + TargetRegistry::RegisterMCSubtargetInfo( + getTheHexagonTarget(), Hexagon_MC::createHexagonMCSubtargetInfo); // Register the MC Code Emitter TargetRegistry::RegisterMCCodeEmitter(getTheHexagonTarget(), @@ -648,18 +677,16 @@ TargetRegistry::RegisterMCAsmBackend(getTheHexagonTarget(), createHexagonAsmBackend); - // Register the MC instruction analyzer. TargetRegistry::RegisterMCInstrAnalysis(getTheHexagonTarget(), createHexagonMCInstrAnalysis); // Register the obj streamer - TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(), - createMCStreamer); + TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(), createMCStreamer); // Register the obj target streamer - TargetRegistry::RegisterObjectTargetStreamer(getTheHexagonTarget(), - createHexagonObjectTargetStreamer); + TargetRegistry::RegisterObjectTargetStreamer( + getTheHexagonTarget(), createHexagonObjectTargetStreamer); // Register the asm streamer TargetRegistry::RegisterAsmTargetStreamer(getTheHexagonTarget(), Index: llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll +++ llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll @@ -2,73 +2,79 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=1024 %s | FileCheck -check-prefixes=OPT,MAX1024 %s ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-intrinsics -amdgpu-mem-intrinsic-expand-size=-1 %s | FileCheck -check-prefixes=OPT,ALL %s -declare void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1 -declare void @llvm.memcpy.p1i8.p3i8.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1 -declare void @llvm.memcpy.p3i8.p1i8.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) #1 -declare void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1 -declare void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1 +declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1 +declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(1)* nocapture readonly, i32, i1) #1 +declare void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(5)* nocapture readonly, i32, i1) #1 +declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1 -declare void @llvm.memmove.p1i8.p1i8.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1 -declare void @llvm.memmove.p1i8.p3i8.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1 -declare void @llvm.memmove.p5i8.p5i8.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1 +declare void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture readonly, i64, i1) #1 +declare void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1) #1 +declare void @llvm.memmove.p5i8.p5i8.i32(i8 addrspace(5)* nocapture, i8 addrspace(5)* nocapture readonly, i32, i1) #1 -declare void @llvm.memset.p1i8.i64(ptr addrspace(1) nocapture, i8, i64, i1) #1 +declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture, i8, i64, i1) #1 ; Test the upper bound for sizes to leave -define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @max_size_small_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; MAX1024-LABEL: @max_size_small_static_memcpy_caller0( -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 1024, i1 false) +; MAX1024-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 addrspace(1)* [[SRC:%.*]], i64 1024, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @max_size_small_static_memcpy_caller0( +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; ALL-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; ALL: load-store-loop: -; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1 -; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; ALL-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; ALL-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 1 +; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; ALL-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 1 +; ALL-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; ALL-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; ALL-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; ALL: memcpy-split: ; ALL-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1024, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) ret void } ; Smallest static size which will be expanded -define amdgpu_kernel void @min_size_large_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @min_size_large_static_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @min_size_large_static_memcpy_caller0( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 1 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 1 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1024 -; OPT-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 1 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1024 -; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 1 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1024 +; OPT-NEXT: [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 1 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1024 +; OPT-NEXT: store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 1 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1025, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) ret void } -define amdgpu_kernel void @max_size_small_static_memmove_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @max_size_small_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; MAX1024-LABEL: @max_size_small_static_memmove_caller0( -; MAX1024-NEXT: call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 1024, i1 false) +; MAX1024-NEXT: call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 addrspace(1)* [[SRC:%.*]], i64 1024, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @max_size_small_static_memmove_caller0( -; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]] +; ALL-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult i8 addrspace(1)* [[SRC:%.*]], [[DST:%.*]] ; ALL-NEXT: [[COMPARE_N_TO_0:%.*]] = icmp eq i64 1024, 0 ; ALL-NEXT: br i1 [[COMPARE_SRC_DST]], label [[COPY_BACKWARDS:%.*]], label [[COPY_FORWARD:%.*]] ; ALL: copy_backwards: @@ -76,33 +82,33 @@ ; ALL: copy_backwards_loop: ; ALL-NEXT: [[TMP1:%.*]] = phi i64 [ [[INDEX_PTR:%.*]], [[COPY_BACKWARDS_LOOP]] ], [ 1024, [[COPY_BACKWARDS]] ] ; ALL-NEXT: [[INDEX_PTR]] = sub i64 [[TMP1]], 1 -; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR]] -; ALL-NEXT: [[ELEMENT:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 1 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR]] -; ALL-NEXT: store i8 [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1 +; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[INDEX_PTR]] +; ALL-NEXT: [[ELEMENT:%.*]] = load i8, i8 addrspace(1)* [[TMP2]], align 1 +; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST]], i64 [[INDEX_PTR]] +; ALL-NEXT: store i8 [[ELEMENT]], i8 addrspace(1)* [[TMP3]], align 1 ; ALL-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_PTR]], 0 ; ALL-NEXT: br i1 [[TMP4]], label [[MEMMOVE_DONE]], label [[COPY_BACKWARDS_LOOP]] ; ALL: copy_forward: ; ALL-NEXT: br i1 [[COMPARE_N_TO_0]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP:%.*]] ; ALL: copy_forward_loop: ; ALL-NEXT: [[INDEX_PTR1:%.*]] = phi i64 [ [[INDEX_INCREMENT:%.*]], [[COPY_FORWARD_LOOP]] ], [ 0, [[COPY_FORWARD]] ] -; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR1]] -; ALL-NEXT: [[ELEMENT2:%.*]] = load i8, ptr addrspace(1) [[TMP5]], align 1 -; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR1]] -; ALL-NEXT: store i8 [[ELEMENT2]], ptr addrspace(1) [[TMP6]], align 1 +; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[INDEX_PTR1]] +; ALL-NEXT: [[ELEMENT2:%.*]] = load i8, i8 addrspace(1)* [[TMP5]], align 1 +; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST]], i64 [[INDEX_PTR1]] +; ALL-NEXT: store i8 [[ELEMENT2]], i8 addrspace(1)* [[TMP6]], align 1 ; ALL-NEXT: [[INDEX_INCREMENT]] = add i64 [[INDEX_PTR1]], 1 ; ALL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_INCREMENT]], 1024 ; ALL-NEXT: br i1 [[TMP7]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP]] ; ALL: memmove_done: ; ALL-NEXT: ret void ; - call void @llvm.memmove.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1024, i1 false) + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1024, i1 false) ret void } -define amdgpu_kernel void @min_size_large_static_memmove_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @min_size_large_static_memmove_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @min_size_large_static_memmove_caller0( -; OPT-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]] +; OPT-NEXT: [[COMPARE_SRC_DST:%.*]] = icmp ult i8 addrspace(1)* [[SRC:%.*]], [[DST:%.*]] ; OPT-NEXT: [[COMPARE_N_TO_0:%.*]] = icmp eq i64 1025, 0 ; OPT-NEXT: br i1 [[COMPARE_SRC_DST]], label [[COPY_BACKWARDS:%.*]], label [[COPY_FORWARD:%.*]] ; OPT: copy_backwards: @@ -110,1200 +116,1376 @@ ; OPT: copy_backwards_loop: ; OPT-NEXT: [[TMP1:%.*]] = phi i64 [ [[INDEX_PTR:%.*]], [[COPY_BACKWARDS_LOOP]] ], [ 1025, [[COPY_BACKWARDS]] ] ; OPT-NEXT: [[INDEX_PTR]] = sub i64 [[TMP1]], 1 -; OPT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR]] -; OPT-NEXT: [[ELEMENT:%.*]] = load i8, ptr addrspace(1) [[TMP2]], align 1 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR]] -; OPT-NEXT: store i8 [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1 +; OPT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[INDEX_PTR]] +; OPT-NEXT: [[ELEMENT:%.*]] = load i8, i8 addrspace(1)* [[TMP2]], align 1 +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST]], i64 [[INDEX_PTR]] +; OPT-NEXT: store i8 [[ELEMENT]], i8 addrspace(1)* [[TMP3]], align 1 ; OPT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_PTR]], 0 ; OPT-NEXT: br i1 [[TMP4]], label [[MEMMOVE_DONE]], label [[COPY_BACKWARDS_LOOP]] ; OPT: copy_forward: ; OPT-NEXT: br i1 [[COMPARE_N_TO_0]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP:%.*]] ; OPT: copy_forward_loop: ; OPT-NEXT: [[INDEX_PTR1:%.*]] = phi i64 [ [[INDEX_INCREMENT:%.*]], [[COPY_FORWARD_LOOP]] ], [ 0, [[COPY_FORWARD]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[INDEX_PTR1]] -; OPT-NEXT: [[ELEMENT2:%.*]] = load i8, ptr addrspace(1) [[TMP5]], align 1 -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[INDEX_PTR1]] -; OPT-NEXT: store i8 [[ELEMENT2]], ptr addrspace(1) [[TMP6]], align 1 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC]], i64 [[INDEX_PTR1]] +; OPT-NEXT: [[ELEMENT2:%.*]] = load i8, i8 addrspace(1)* [[TMP5]], align 1 +; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST]], i64 [[INDEX_PTR1]] +; OPT-NEXT: store i8 [[ELEMENT2]], i8 addrspace(1)* [[TMP6]], align 1 ; OPT-NEXT: [[INDEX_INCREMENT]] = add i64 [[INDEX_PTR1]], 1 ; OPT-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_INCREMENT]], 1025 ; OPT-NEXT: br i1 [[TMP7]], label [[MEMMOVE_DONE]], label [[COPY_FORWARD_LOOP]] ; OPT: memmove_done: ; OPT-NEXT: ret void ; - call void @llvm.memmove.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1025, i1 false) + call void @llvm.memmove.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 1025, i1 false) ret void } -define amdgpu_kernel void @max_size_small_static_memset_caller0(ptr addrspace(1) %dst, i8 %val) #0 { +define amdgpu_kernel void @max_size_small_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { ; MAX1024-LABEL: @max_size_small_static_memset_caller0( -; MAX1024-NEXT: call void @llvm.memset.p1.i64(ptr addrspace(1) [[DST:%.*]], i8 [[VAL:%.*]], i64 1024, i1 false) +; MAX1024-NEXT: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* [[DST:%.*]], i8 [[VAL:%.*]], i64 1024, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @max_size_small_static_memset_caller0( ; ALL-NEXT: br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]] ; ALL: loadstoreloop: ; ALL-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ] -; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]] -; ALL-NEXT: store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1 +; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]] +; ALL-NEXT: store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]], align 1 ; ALL-NEXT: [[TMP3]] = add i64 [[TMP1]], 1 ; ALL-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1024 ; ALL-NEXT: br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]] ; ALL: split: ; ALL-NEXT: ret void ; - call void @llvm.memset.p1i8.i64(ptr addrspace(1) %dst, i8 %val, i64 1024, i1 false) + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1024, i1 false) ret void } -define amdgpu_kernel void @min_size_large_static_memset_caller0(ptr addrspace(1) %dst, i8 %val) #0 { +define amdgpu_kernel void @min_size_large_static_memset_caller0(i8 addrspace(1)* %dst, i8 %val) #0 { ; OPT-LABEL: @min_size_large_static_memset_caller0( ; OPT-NEXT: br i1 false, label [[SPLIT:%.*]], label [[LOADSTORELOOP:%.*]] ; OPT: loadstoreloop: ; OPT-NEXT: [[TMP1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], [[LOADSTORELOOP]] ] -; OPT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[TMP1]] -; OPT-NEXT: store i8 [[VAL:%.*]], ptr addrspace(1) [[TMP2]], align 1 +; OPT-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 [[TMP1]] +; OPT-NEXT: store i8 [[VAL:%.*]], i8 addrspace(1)* [[TMP2]], align 1 ; OPT-NEXT: [[TMP3]] = add i64 [[TMP1]], 1 ; OPT-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1025 ; OPT-NEXT: br i1 [[TMP4]], label [[LOADSTORELOOP]], label [[SPLIT]] ; OPT: split: ; OPT-NEXT: ret void ; - call void @llvm.memset.p1i8.i64(ptr addrspace(1) %dst, i8 %val, i64 1025, i1 false) + call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 1025, i1 false) ret void } -define amdgpu_kernel void @variable_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 { +define amdgpu_kernel void @variable_memcpy_caller0(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { ; OPT-LABEL: @variable_memcpy_caller0( -; OPT-NEXT: [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16 -; OPT-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 16 -; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16 +; OPT-NEXT: [[TMP4:%.*]] = urem i64 [[N]], 16 +; OPT-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1 -; OPT-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1 +; OPT-NEXT: [[TMP10]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1 -; OPT-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1 +; OPT-NEXT: [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) ret void } -define amdgpu_kernel void @variable_memcpy_caller1(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 { +define amdgpu_kernel void @variable_memcpy_caller1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { ; OPT-LABEL: @variable_memcpy_caller1( -; OPT-NEXT: [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16 -; OPT-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 16 -; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16 +; OPT-NEXT: [[TMP4:%.*]] = urem i64 [[N]], 16 +; OPT-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1 -; OPT-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1 +; OPT-NEXT: [[TMP10]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1 -; OPT-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1 +; OPT-NEXT: [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n, i1 false) ret void } -define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0, ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %n, i64 %m) #0 { +define amdgpu_kernel void @memcpy_multi_use_one_function(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n, i64 %m) #0 { ; OPT-LABEL: @memcpy_multi_use_one_function( -; OPT-NEXT: [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16 -; OPT-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 16 -; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] -; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1 -; OPT-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] -; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST0]], i64 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1 -; OPT-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] -; OPT: post-loop-memcpy-expansion: -; OPT-NEXT: [[TMP16:%.*]] = udiv i64 [[M:%.*]], 16 -; OPT-NEXT: [[TMP17:%.*]] = urem i64 [[M]], 16 -; OPT-NEXT: [[TMP18:%.*]] = sub i64 [[M]], [[TMP17]] -; OPT-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP16]], 0 -; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_EXPANSION2:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST0:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16 +; OPT-NEXT: [[TMP4:%.*]] = urem i64 [[N]], 16 +; OPT-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION2:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5:%.*]] ; OPT: loop-memcpy-expansion2: -; OPT-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP23:%.*]], [[LOOP_MEMCPY_EXPANSION2]] ] -; OPT-NEXT: [[TMP20:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX3]] -; OPT-NEXT: [[TMP21:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP20]], align 1 -; OPT-NEXT: [[TMP22:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX3]] -; OPT-NEXT: store <4 x i32> [[TMP21]], ptr addrspace(1) [[TMP22]], align 1 -; OPT-NEXT: [[TMP23]] = add i64 [[LOOP_INDEX3]], 1 -; OPT-NEXT: [[TMP24:%.*]] = icmp ult i64 [[TMP23]], [[TMP16]] -; OPT-NEXT: br i1 [[TMP24]], label [[LOOP_MEMCPY_EXPANSION2]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5]] +; OPT-NEXT: [[LOOP_INDEX3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION2]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX3]] +; OPT-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX3]] +; OPT-NEXT: store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1 +; OPT-NEXT: [[TMP10]] = add i64 [[LOOP_INDEX3]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION2]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5]] ; OPT: loop-memcpy-residual4: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX6:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER5]] ], [ [[TMP29:%.*]], [[LOOP_MEMCPY_RESIDUAL4:%.*]] ] -; OPT-NEXT: [[TMP25:%.*]] = add i64 [[TMP18]], [[RESIDUAL_LOOP_INDEX6]] -; OPT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP25]] -; OPT-NEXT: [[TMP27:%.*]] = load i8, ptr addrspace(1) [[TMP26]], align 1 -; OPT-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 [[TMP25]] -; OPT-NEXT: store i8 [[TMP27]], ptr addrspace(1) [[TMP28]], align 1 -; OPT-NEXT: [[TMP29]] = add i64 [[RESIDUAL_LOOP_INDEX6]], 1 -; OPT-NEXT: [[TMP30:%.*]] = icmp ult i64 [[TMP29]], [[TMP17]] -; OPT-NEXT: br i1 [[TMP30]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX6:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER5]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL4:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX6]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1 +; OPT-NEXT: [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX6]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1:%.*]] ; OPT: post-loop-memcpy-expansion1: +; OPT-NEXT: [[TMP20:%.*]] = bitcast i8 addrspace(1)* [[SRC]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP21:%.*]] = bitcast i8 addrspace(1)* [[DST1:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP22:%.*]] = udiv i64 [[M:%.*]], 16 +; OPT-NEXT: [[TMP23:%.*]] = urem i64 [[M]], 16 +; OPT-NEXT: [[TMP24:%.*]] = sub i64 [[M]], [[TMP23]] +; OPT-NEXT: [[TMP25:%.*]] = icmp ne i64 [[TMP22]], 0 +; OPT-NEXT: br i1 [[TMP25]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT: loop-memcpy-expansion: +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION1]] ], [ [[TMP29:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP26:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP20]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP27:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP26]], align 1 +; OPT-NEXT: [[TMP28:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP21]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP27]], <4 x i32> addrspace(1)* [[TMP28]], align 1 +; OPT-NEXT: [[TMP29]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP30:%.*]] = icmp ult i64 [[TMP29]], [[TMP22]] +; OPT-NEXT: br i1 [[TMP30]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT: loop-memcpy-residual: +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP37:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP20]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP32:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP21]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP33:%.*]] = add i64 [[TMP24]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP31]], i64 [[TMP33]] +; OPT-NEXT: [[TMP35:%.*]] = load i8, i8 addrspace(1)* [[TMP34]], align 1 +; OPT-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP32]], i64 [[TMP33]] +; OPT-NEXT: store i8 [[TMP35]], i8 addrspace(1)* [[TMP36]], align 1 +; OPT-NEXT: [[TMP37]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP38:%.*]] = icmp ult i64 [[TMP37]], [[TMP23]] +; OPT-NEXT: br i1 [[TMP38]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP31]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP39:%.*]] = icmp ne i64 [[TMP23]], 0 +; OPT-NEXT: br i1 [[TMP39]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; OPT: loop-memcpy-residual-header5: -; OPT-NEXT: [[TMP32:%.*]] = icmp ne i64 [[TMP17]], 0 -; OPT-NEXT: br i1 [[TMP32]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1]] +; OPT-NEXT: [[TMP40:%.*]] = icmp ne i64 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP40]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1]] ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst0, ptr addrspace(1) %src, i64 %n, i1 false) - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %m, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %m, i1 false) ret void } -define amdgpu_kernel void @memcpy_alt_type(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n) #0 { +define amdgpu_kernel void @memcpy_alt_type(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { ; OPT-LABEL: @memcpy_alt_type( -; OPT-NEXT: [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8 -; OPT-NEXT: [[TMP2:%.*]] = urem i32 [[N]], 8 -; OPT-NEXT: [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to <2 x i32> addrspace(3)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <2 x i32> addrspace(1)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8 +; OPT-NEXT: [[TMP4:%.*]] = urem i32 [[N]], 8 +; OPT-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(3) [[TMP5]], align 1 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(1) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1 -; OPT-NEXT: [[TMP8]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(3)* [[TMP7]], align 1 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store <2 x i32> [[TMP8]], <2 x i32> addrspace(1)* [[TMP9]], align 1 +; OPT-NEXT: [[TMP10]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 1 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i32 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1 -; OPT-NEXT: [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP1]] to i8 addrspace(3)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 1 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i32 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1 +; OPT-NEXT: [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p1i8.p3i8.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n, i1 false) ret void } ; One of the uses in the function should be expanded, the other left alone. -define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspace(1) %dst0, ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %n) #0 { +define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(i8 addrspace(1)* %dst0, i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 %n) #0 { ; MAX1024-LABEL: @memcpy_multi_use_one_function_keep_small( -; MAX1024-NEXT: [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16 -; MAX1024-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 16 -; MAX1024-NEXT: [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]] -; MAX1024-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0 -; MAX1024-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; MAX1024-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; MAX1024-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST0:%.*]] to <4 x i32> addrspace(1)* +; MAX1024-NEXT: [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16 +; MAX1024-NEXT: [[TMP4:%.*]] = urem i64 [[N]], 16 +; MAX1024-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; MAX1024-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0 +; MAX1024-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; MAX1024: loop-memcpy-expansion: -; MAX1024-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; MAX1024-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; MAX1024-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 -; MAX1024-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX]] -; MAX1024-NEXT: store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1 -; MAX1024-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1 -; MAX1024-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]] -; MAX1024-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; MAX1024-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; MAX1024-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; MAX1024-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1 +; MAX1024-NEXT: [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; MAX1024-NEXT: store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1 +; MAX1024-NEXT: [[TMP10]] = add i64 [[LOOP_INDEX]], 1 +; MAX1024-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]] +; MAX1024-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; MAX1024: loop-memcpy-residual: -; MAX1024-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; MAX1024-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; MAX1024-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]] -; MAX1024-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1 -; MAX1024-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST0]], i64 [[TMP10]] -; MAX1024-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1 -; MAX1024-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 -; MAX1024-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]] -; MAX1024-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; MAX1024-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; MAX1024-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; MAX1024-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; MAX1024-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; MAX1024-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]] +; MAX1024-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1 +; MAX1024-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]] +; MAX1024-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1 +; MAX1024-NEXT: [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 +; MAX1024-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]] +; MAX1024-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; MAX1024: post-loop-memcpy-expansion: -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST1:%.*]], ptr addrspace(1) [[SRC]], i64 102, i1 false) +; MAX1024-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* [[DST1:%.*]], i8 addrspace(1)* [[SRC]], i64 102, i1 false) ; MAX1024-NEXT: ret void ; MAX1024: loop-memcpy-residual-header: -; MAX1024-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0 -; MAX1024-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; MAX1024-NEXT: [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0 +; MAX1024-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; ; ALL-LABEL: @memcpy_multi_use_one_function_keep_small( -; ALL-NEXT: [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16 -; ALL-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 16 -; ALL-NEXT: [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]] -; ALL-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0 -; ALL-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST0:%.*]] to <4 x i32> addrspace(1)* +; ALL-NEXT: [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16 +; ALL-NEXT: [[TMP4:%.*]] = urem i64 [[N]], 16 +; ALL-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; ALL-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0 +; ALL-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; ALL: loop-memcpy-expansion: -; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 -; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST0:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1 -; ALL-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1 -; ALL-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]] -; ALL-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; ALL-NEXT: [[LOOP_INDEX1:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX1]] +; ALL-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1 +; ALL-NEXT: [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX1]] +; ALL-NEXT: store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1 +; ALL-NEXT: [[TMP10]] = add i64 [[LOOP_INDEX1]], 1 +; ALL-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]] +; ALL-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; ALL: loop-memcpy-residual: -; ALL-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; ALL-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; ALL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]] -; ALL-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1 -; ALL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST0]], i64 [[TMP10]] -; ALL-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1 -; ALL-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 -; ALL-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]] -; ALL-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; ALL-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; ALL-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; ALL-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; ALL-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; ALL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]] +; ALL-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1 +; ALL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]] +; ALL-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1 +; ALL-NEXT: [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 +; ALL-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]] +; ALL-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; ALL: post-loop-memcpy-expansion: +; ALL-NEXT: [[TMP20:%.*]] = bitcast i8 addrspace(1)* [[SRC]] to <4 x i32> addrspace(1)* +; ALL-NEXT: [[TMP21:%.*]] = bitcast i8 addrspace(1)* [[DST1:%.*]] to <4 x i32> addrspace(1)* ; ALL-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; ALL: load-store-loop: -; ALL-NEXT: [[LOOP_INDEX1:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP19:%.*]], [[LOAD_STORE_LOOP]] ] -; ALL-NEXT: [[TMP16:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX1]] -; ALL-NEXT: [[TMP17:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP16]], align 1 -; ALL-NEXT: [[TMP18:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX1]] -; ALL-NEXT: store <4 x i32> [[TMP17]], ptr addrspace(1) [[TMP18]], align 1 -; ALL-NEXT: [[TMP19]] = add i64 [[LOOP_INDEX1]], 1 -; ALL-NEXT: [[TMP20:%.*]] = icmp ult i64 [[TMP19]], 6 -; ALL-NEXT: br i1 [[TMP20]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP25:%.*]], [[LOAD_STORE_LOOP]] ] +; ALL-NEXT: [[TMP22:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP20]], i64 [[LOOP_INDEX]] +; ALL-NEXT: [[TMP23:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP22]], align 1 +; ALL-NEXT: [[TMP24:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP21]], i64 [[LOOP_INDEX]] +; ALL-NEXT: store <4 x i32> [[TMP23]], <4 x i32> addrspace(1)* [[TMP24]], align 1 +; ALL-NEXT: [[TMP25]] = add i64 [[LOOP_INDEX]], 1 +; ALL-NEXT: [[TMP26:%.*]] = icmp ult i64 [[TMP25]], 6 +; ALL-NEXT: br i1 [[TMP26]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; ALL: memcpy-split: -; ALL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 24 -; ALL-NEXT: [[TMP22:%.*]] = load i32, ptr addrspace(1) [[TMP21]], align 1 -; ALL-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST1]], i64 24 -; ALL-NEXT: store i32 [[TMP22]], ptr addrspace(1) [[TMP23]], align 1 -; ALL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 50 -; ALL-NEXT: [[TMP25:%.*]] = load i16, ptr addrspace(1) [[TMP24]], align 1 -; ALL-NEXT: [[TMP26:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST1]], i64 50 -; ALL-NEXT: store i16 [[TMP25]], ptr addrspace(1) [[TMP26]], align 1 +; ALL-NEXT: [[TMP27:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP20]] to i32 addrspace(1)* +; ALL-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP27]], i64 24 +; ALL-NEXT: [[TMP29:%.*]] = load i32, i32 addrspace(1)* [[TMP28]], align 1 +; ALL-NEXT: [[TMP30:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP21]] to i32 addrspace(1)* +; ALL-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP30]], i64 24 +; ALL-NEXT: store i32 [[TMP29]], i32 addrspace(1)* [[TMP31]], align 1 +; ALL-NEXT: [[TMP32:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP20]] to i16 addrspace(1)* +; ALL-NEXT: [[TMP33:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP32]], i64 50 +; ALL-NEXT: [[TMP34:%.*]] = load i16, i16 addrspace(1)* [[TMP33]], align 1 +; ALL-NEXT: [[TMP35:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP21]] to i16 addrspace(1)* +; ALL-NEXT: [[TMP36:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP35]], i64 50 +; ALL-NEXT: store i16 [[TMP34]], i16 addrspace(1)* [[TMP36]], align 1 ; ALL-NEXT: ret void ; ALL: loop-memcpy-residual-header: -; ALL-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP2]], 0 -; ALL-NEXT: br i1 [[TMP27]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; ALL-NEXT: [[TMP37:%.*]] = icmp ne i64 [[TMP4]], 0 +; ALL-NEXT: br i1 [[TMP37]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst0, ptr addrspace(1) %src, i64 %n, i1 false) - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 102, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst0, i8 addrspace(1)* %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %dst1, i8 addrspace(1)* %src, i64 102, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_1028( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 256 -; OPT-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST]], i64 256 -; OPT-NEXT: store i32 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i32 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP8]], i64 256 +; OPT-NEXT: [[TMP10:%.*]] = load i32, i32 addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i32 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP11]], i64 256 +; OPT-NEXT: store i32 [[TMP10]], i32 addrspace(1)* [[TMP12]], align 4 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1028, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1028, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_1025( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1024 -; OPT-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1024 -; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1024 +; OPT-NEXT: [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1024 +; OPT-NEXT: store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 4 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1025, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1025, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_1026( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 512 -; OPT-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(1) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 512 -; OPT-NEXT: store i16 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP8]], i64 512 +; OPT-NEXT: [[TMP10:%.*]] = load i16, i16 addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP11]], i64 512 +; OPT-NEXT: store i16 [[TMP10]], i16 addrspace(1)* [[TMP12]], align 4 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1026, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1026, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_1032( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128 -; OPT-NEXT: [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128 -; OPT-NEXT: store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128 +; OPT-NEXT: [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128 +; OPT-NEXT: store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1032, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1032, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_1034( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128 -; OPT-NEXT: [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128 -; OPT-NEXT: store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4 -; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 516 -; OPT-NEXT: [[TMP10:%.*]] = load i16, ptr addrspace(1) [[TMP9]], align 4 -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 516 -; OPT-NEXT: store i16 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128 +; OPT-NEXT: [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128 +; OPT-NEXT: store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4 +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP13]], i64 516 +; OPT-NEXT: [[TMP15:%.*]] = load i16, i16 addrspace(1)* [[TMP14]], align 4 +; OPT-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP16]], i64 516 +; OPT-NEXT: store i16 [[TMP15]], i16 addrspace(1)* [[TMP17]], align 4 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1034, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1034, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_1035( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128 -; OPT-NEXT: [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128 -; OPT-NEXT: store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4 -; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 516 -; OPT-NEXT: [[TMP10:%.*]] = load i16, ptr addrspace(1) [[TMP9]], align 4 -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 516 -; OPT-NEXT: store i16 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4 -; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1034 -; OPT-NEXT: [[TMP13:%.*]] = load i8, ptr addrspace(1) [[TMP12]], align 2 -; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1034 -; OPT-NEXT: store i8 [[TMP13]], ptr addrspace(1) [[TMP14]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128 +; OPT-NEXT: [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128 +; OPT-NEXT: store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4 +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP13]], i64 516 +; OPT-NEXT: [[TMP15:%.*]] = load i16, i16 addrspace(1)* [[TMP14]], align 4 +; OPT-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP16]], i64 516 +; OPT-NEXT: store i16 [[TMP15]], i16 addrspace(1)* [[TMP17]], align 4 +; OPT-NEXT: [[TMP18:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP18]], i64 1034 +; OPT-NEXT: [[TMP20:%.*]] = load i8, i8 addrspace(1)* [[TMP19]], align 2 +; OPT-NEXT: [[TMP21:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP21]], i64 1034 +; OPT-NEXT: store i8 [[TMP20]], i8 addrspace(1)* [[TMP22]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1035, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1035, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_1036( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128 -; OPT-NEXT: [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128 -; OPT-NEXT: store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4 -; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 258 -; OPT-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP9]], align 4 -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST]], i64 258 -; OPT-NEXT: store i32 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128 +; OPT-NEXT: [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128 +; OPT-NEXT: store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4 +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i32 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP13]], i64 258 +; OPT-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(1)* [[TMP14]], align 4 +; OPT-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i32 addrspace(1)* +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP16]], i64 258 +; OPT-NEXT: store i32 [[TMP15]], i32 addrspace(1)* [[TMP17]], align 4 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1036, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1036, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_1039( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC]], i64 128 -; OPT-NEXT: [[TMP7:%.*]] = load i64, ptr addrspace(1) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST]], i64 128 -; OPT-NEXT: store i64 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4 -; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 258 -; OPT-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP9]], align 4 -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST]], i64 258 -; OPT-NEXT: store i32 [[TMP10]], ptr addrspace(1) [[TMP11]], align 4 -; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 518 -; OPT-NEXT: [[TMP13:%.*]] = load i16, ptr addrspace(1) [[TMP12]], align 4 -; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 518 -; OPT-NEXT: store i16 [[TMP13]], ptr addrspace(1) [[TMP14]], align 4 -; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1038 -; OPT-NEXT: [[TMP16:%.*]] = load i8, ptr addrspace(1) [[TMP15]], align 2 -; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1038 -; OPT-NEXT: store i8 [[TMP16]], ptr addrspace(1) [[TMP17]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP8]], i64 128 +; OPT-NEXT: [[TMP10:%.*]] = load i64, i64 addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i64 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP11]], i64 128 +; OPT-NEXT: store i64 [[TMP10]], i64 addrspace(1)* [[TMP12]], align 4 +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i32 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP13]], i64 258 +; OPT-NEXT: [[TMP15:%.*]] = load i32, i32 addrspace(1)* [[TMP14]], align 4 +; OPT-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i32 addrspace(1)* +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP16]], i64 258 +; OPT-NEXT: store i32 [[TMP15]], i32 addrspace(1)* [[TMP17]], align 4 +; OPT-NEXT: [[TMP18:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP18]], i64 518 +; OPT-NEXT: [[TMP20:%.*]] = load i16, i16 addrspace(1)* [[TMP19]], align 4 +; OPT-NEXT: [[TMP21:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP21]], i64 518 +; OPT-NEXT: store i16 [[TMP20]], i16 addrspace(1)* [[TMP22]], align 4 +; OPT-NEXT: [[TMP23:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP23]], i64 1038 +; OPT-NEXT: [[TMP25:%.*]] = load i8, i8 addrspace(1)* [[TMP24]], align 2 +; OPT-NEXT: [[TMP26:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP26]], i64 1038 +; OPT-NEXT: store i8 [[TMP25]], i8 addrspace(1)* [[TMP27]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1039, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1039, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align2_global_align2_1039(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align2_global_align2_1039(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align2_global_align2_1039( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(1) [[TMP1]], align 2 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store i16 [[TMP2]], ptr addrspace(1) [[TMP3]], align 2 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 519 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load i16, i16 addrspace(1)* [[TMP3]], align 2 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store i16 [[TMP4]], i16 addrspace(1)* [[TMP5]], align 2 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 519 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1038 -; OPT-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 2 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1038 -; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast i16 addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1038 +; OPT-NEXT: [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 2 +; OPT-NEXT: [[TMP11:%.*]] = bitcast i16 addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1038 +; OPT-NEXT: store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 %src, i64 1039, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %dst, i8 addrspace(1)* align 2 %src, i64 1039, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_1027( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 512 -; OPT-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(1) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 512 -; OPT-NEXT: store i16 [[TMP7]], ptr addrspace(1) [[TMP8]], align 4 -; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1026 -; OPT-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(1) [[TMP9]], align 2 -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1026 -; OPT-NEXT: store i8 [[TMP10]], ptr addrspace(1) [[TMP11]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP8]], i64 512 +; OPT-NEXT: [[TMP10:%.*]] = load i16, i16 addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP11]], i64 512 +; OPT-NEXT: store i16 [[TMP10]], i16 addrspace(1)* [[TMP12]], align 4 +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 1026 +; OPT-NEXT: [[TMP15:%.*]] = load i8, i8 addrspace(1)* [[TMP14]], align 2 +; OPT-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP16]], i64 1026 +; OPT-NEXT: store i8 [[TMP15]], i8 addrspace(1)* [[TMP17]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1027, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1027, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align2_global_align4_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align2_global_align4_1027(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align2_global_align4_1027( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(1) [[TMP1]], align 2 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store i16 [[TMP2]], ptr addrspace(1) [[TMP3]], align 2 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 513 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load i16, i16 addrspace(1)* [[TMP3]], align 2 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store i16 [[TMP4]], i16 addrspace(1)* [[TMP5]], align 2 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 513 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1026 -; OPT-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 2 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1026 -; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast i16 addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1026 +; OPT-NEXT: [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 2 +; OPT-NEXT: [[TMP11:%.*]] = bitcast i16 addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1026 +; OPT-NEXT: store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 4 %src, i64 1027, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %dst, i8 addrspace(1)* align 4 %src, i64 1027, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align2_1027(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align2_1027(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align2_1027( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(1) [[TMP1]], align 2 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store i16 [[TMP2]], ptr addrspace(1) [[TMP3]], align 2 -; OPT-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 513 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load i16, i16 addrspace(1)* [[TMP3]], align 2 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store i16 [[TMP4]], i16 addrspace(1)* [[TMP5]], align 2 +; OPT-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 513 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 1026 -; OPT-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(1) [[TMP6]], align 2 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 1026 -; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(1) [[TMP8]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast i16 addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP8]], i64 1026 +; OPT-NEXT: [[TMP10:%.*]] = load i8, i8 addrspace(1)* [[TMP9]], align 2 +; OPT-NEXT: [[TMP11:%.*]] = bitcast i16 addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP11]], i64 1026 +; OPT-NEXT: store i8 [[TMP10]], i8 addrspace(1)* [[TMP12]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 2 %src, i64 1027, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 2 %src, i64 1027, i1 false) ret void } -define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 { +define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 { ; OPT-LABEL: @memcpy_private_align4_private_align4_1027( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to <4 x i32> addrspace(5)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to <4 x i32> addrspace(5)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(5)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(5)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC]], i32 512 -; OPT-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(5) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST]], i32 512 -; OPT-NEXT: store i16 [[TMP7]], ptr addrspace(5) [[TMP8]], align 4 -; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026 -; OPT-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(5) [[TMP9]], align 2 -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026 -; OPT-NEXT: store i8 [[TMP10]], ptr addrspace(5) [[TMP11]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i16 addrspace(5)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP8]], i32 512 +; OPT-NEXT: [[TMP10:%.*]] = load i16, i16 addrspace(5)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i16 addrspace(5)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP11]], i32 512 +; OPT-NEXT: store i16 [[TMP10]], i16 addrspace(5)* [[TMP12]], align 4 +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP13]], i32 1026 +; OPT-NEXT: [[TMP15:%.*]] = load i8, i8 addrspace(5)* [[TMP14]], align 2 +; OPT-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP16]], i32 1026 +; OPT-NEXT: store i8 [[TMP15]], i8 addrspace(5)* [[TMP17]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false) + call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 4 %dst, i8 addrspace(5)* align 4 %src, i32 1027, i1 false) ret void } -define amdgpu_kernel void @memcpy_private_align2_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 { +define amdgpu_kernel void @memcpy_private_align2_private_align4_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 { ; OPT-LABEL: @memcpy_private_align2_private_align4_1027( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to i16 addrspace(5)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to i16 addrspace(5)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(5) [[TMP1]], align 2 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store i16 [[TMP2]], ptr addrspace(5) [[TMP3]], align 2 -; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 513 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load i16, i16 addrspace(5)* [[TMP3]], align 2 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store i16 [[TMP4]], i16 addrspace(5)* [[TMP5]], align 2 +; OPT-NEXT: [[TMP6]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 513 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026 -; OPT-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(5) [[TMP6]], align 2 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026 -; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(5) [[TMP8]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast i16 addrspace(5)* [[TMP1]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP8]], i32 1026 +; OPT-NEXT: [[TMP10:%.*]] = load i8, i8 addrspace(5)* [[TMP9]], align 2 +; OPT-NEXT: [[TMP11:%.*]] = bitcast i16 addrspace(5)* [[TMP2]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP11]], i32 1026 +; OPT-NEXT: store i8 [[TMP10]], i8 addrspace(5)* [[TMP12]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false) + call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 2 %dst, i8 addrspace(5)* align 4 %src, i32 1027, i1 false) ret void } -define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 { +define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 { ; OPT-LABEL: @memcpy_private_align1_private_align4_1027( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to <4 x i32> addrspace(5)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to <4 x i32> addrspace(5)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 4 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1 -; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(5)* [[TMP3]], align 4 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(5)* [[TMP5]], align 1 +; OPT-NEXT: [[TMP6]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC]], i32 512 -; OPT-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(5) [[TMP6]], align 4 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST]], i32 512 -; OPT-NEXT: store i16 [[TMP7]], ptr addrspace(5) [[TMP8]], align 1 -; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026 -; OPT-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(5) [[TMP9]], align 2 -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026 -; OPT-NEXT: store i8 [[TMP10]], ptr addrspace(5) [[TMP11]], align 1 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i16 addrspace(5)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP8]], i32 512 +; OPT-NEXT: [[TMP10:%.*]] = load i16, i16 addrspace(5)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i16 addrspace(5)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP11]], i32 512 +; OPT-NEXT: store i16 [[TMP10]], i16 addrspace(5)* [[TMP12]], align 1 +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP13]], i32 1026 +; OPT-NEXT: [[TMP15:%.*]] = load i8, i8 addrspace(5)* [[TMP14]], align 2 +; OPT-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP16]], i32 1026 +; OPT-NEXT: store i8 [[TMP15]], i8 addrspace(5)* [[TMP17]], align 1 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 4 %src, i32 1027, i1 false) + call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 1 %dst, i8 addrspace(5)* align 4 %src, i32 1027, i1 false) ret void } -define amdgpu_kernel void @memcpy_private_align4_private_align2_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 { +define amdgpu_kernel void @memcpy_private_align4_private_align2_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 { ; OPT-LABEL: @memcpy_private_align4_private_align2_1027( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to i16 addrspace(5)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to i16 addrspace(5)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(5) [[TMP1]], align 2 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store i16 [[TMP2]], ptr addrspace(5) [[TMP3]], align 2 -; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 513 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load i16, i16 addrspace(5)* [[TMP3]], align 2 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store i16 [[TMP4]], i16 addrspace(5)* [[TMP5]], align 2 +; OPT-NEXT: [[TMP6]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 513 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026 -; OPT-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(5) [[TMP6]], align 2 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026 -; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(5) [[TMP8]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast i16 addrspace(5)* [[TMP1]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP8]], i32 1026 +; OPT-NEXT: [[TMP10:%.*]] = load i8, i8 addrspace(5)* [[TMP9]], align 2 +; OPT-NEXT: [[TMP11:%.*]] = bitcast i16 addrspace(5)* [[TMP2]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP11]], i32 1026 +; OPT-NEXT: store i8 [[TMP10]], i8 addrspace(5)* [[TMP12]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 2 %src, i32 1027, i1 false) + call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 4 %dst, i8 addrspace(5)* align 2 %src, i32 1027, i1 false) ret void } -define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 { +define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 { ; OPT-LABEL: @memcpy_private_align4_private_align1_1027( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to <4 x i32> addrspace(5)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to <4 x i32> addrspace(5)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 1 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4 -; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 64 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(5)* [[TMP3]], align 1 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(5)* [[TMP5]], align 4 +; OPT-NEXT: [[TMP6]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 64 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC]], i32 512 -; OPT-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(5) [[TMP6]], align 1 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST]], i32 512 -; OPT-NEXT: store i16 [[TMP7]], ptr addrspace(5) [[TMP8]], align 4 -; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026 -; OPT-NEXT: [[TMP10:%.*]] = load i8, ptr addrspace(5) [[TMP9]], align 1 -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026 -; OPT-NEXT: store i8 [[TMP10]], ptr addrspace(5) [[TMP11]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i16 addrspace(5)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP8]], i32 512 +; OPT-NEXT: [[TMP10:%.*]] = load i16, i16 addrspace(5)* [[TMP9]], align 1 +; OPT-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i16 addrspace(5)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP11]], i32 512 +; OPT-NEXT: store i16 [[TMP10]], i16 addrspace(5)* [[TMP12]], align 4 +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP1]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP13]], i32 1026 +; OPT-NEXT: [[TMP15:%.*]] = load i8, i8 addrspace(5)* [[TMP14]], align 1 +; OPT-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> addrspace(5)* [[TMP2]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP16]], i32 1026 +; OPT-NEXT: store i8 [[TMP15]], i8 addrspace(5)* [[TMP17]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 4 %dst, ptr addrspace(5) align 1 %src, i32 1027, i1 false) + call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 4 %dst, i8 addrspace(5)* align 1 %src, i32 1027, i1 false) ret void } -define amdgpu_kernel void @memcpy_private_align2_private_align2_1027(ptr addrspace(5) %dst, ptr addrspace(5) %src) #0 { +define amdgpu_kernel void @memcpy_private_align2_private_align2_1027(i8 addrspace(5)* %dst, i8 addrspace(5)* %src) #0 { ; OPT-LABEL: @memcpy_private_align2_private_align2_1027( +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(5)* [[SRC:%.*]] to i16 addrspace(5)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(5)* [[DST:%.*]] to i16 addrspace(5)* ; OPT-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; OPT: load-store-loop: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; OPT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(5) [[TMP1]], align 2 -; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store i16 [[TMP2]], ptr addrspace(5) [[TMP3]], align 2 -; OPT-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 513 -; OPT-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; OPT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP4:%.*]] = load i16, i16 addrspace(5)* [[TMP3]], align 2 +; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(5)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store i16 [[TMP4]], i16 addrspace(5)* [[TMP5]], align 2 +; OPT-NEXT: [[TMP6]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 513 +; OPT-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; OPT: memcpy-split: -; OPT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 1026 -; OPT-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(5) [[TMP6]], align 2 -; OPT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 1026 -; OPT-NEXT: store i8 [[TMP7]], ptr addrspace(5) [[TMP8]], align 2 +; OPT-NEXT: [[TMP8:%.*]] = bitcast i16 addrspace(5)* [[TMP1]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP8]], i32 1026 +; OPT-NEXT: [[TMP10:%.*]] = load i8, i8 addrspace(5)* [[TMP9]], align 2 +; OPT-NEXT: [[TMP11:%.*]] = bitcast i16 addrspace(5)* [[TMP2]] to i8 addrspace(5)* +; OPT-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP11]], i32 1026 +; OPT-NEXT: store i8 [[TMP10]], i8 addrspace(5)* [[TMP12]], align 2 ; OPT-NEXT: ret void ; - call void @llvm.memcpy.p5i8.p5i8.i32(ptr addrspace(5) align 2 %dst, ptr addrspace(5) align 2 %src, i32 1027, i1 false) + call void @llvm.memcpy.p5i8.p5i8.i32(i8 addrspace(5)* align 2 %dst, i8 addrspace(5)* align 2 %src, i32 1027, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_variable(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { ; OPT-LABEL: @memcpy_global_align4_global_align4_variable( -; OPT-NEXT: [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16 -; OPT-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 16 -; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16 +; OPT-NEXT: [[TMP4:%.*]] = urem i64 [[N]], 16 +; OPT-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 4 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 4 -; OPT-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 4 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP10]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 4 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 4 -; OPT-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 4 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 4 +; OPT-NEXT: [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 %n, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align2_global_align2_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 { +define amdgpu_kernel void @memcpy_global_align2_global_align2_variable(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { ; OPT-LABEL: @memcpy_global_align2_global_align2_variable( -; OPT-NEXT: [[TMP1:%.*]] = udiv i64 [[N:%.*]], 2 -; OPT-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 2 -; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i64 [[N:%.*]], 2 +; OPT-NEXT: [[TMP4:%.*]] = urem i64 [[N]], 2 +; OPT-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(1) [[TMP5]], align 2 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store i16 [[TMP6]], ptr addrspace(1) [[TMP7]], align 2 -; OPT-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load i16, i16 addrspace(1)* [[TMP7]], align 2 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store i16 [[TMP8]], i16 addrspace(1)* [[TMP9]], align 2 +; OPT-NEXT: [[TMP10]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 2 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 2 -; OPT-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast i16 addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast i16 addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 2 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 2 +; OPT-NEXT: [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 2 %dst, ptr addrspace(1) align 2 %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 2 %dst, i8 addrspace(1)* align 2 %src, i64 %n, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align1_global_align1_variable(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 %n) #0 { +define amdgpu_kernel void @memcpy_global_align1_global_align1_variable(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i64 %n) #0 { ; OPT-LABEL: @memcpy_global_align1_global_align1_variable( -; OPT-NEXT: [[TMP1:%.*]] = udiv i64 [[N:%.*]], 16 -; OPT-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 16 -; OPT-NEXT: [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i64 [[N:%.*]], 16 +; OPT-NEXT: [[TMP4:%.*]] = urem i64 [[N]], 16 +; OPT-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; OPT-NEXT: store <4 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 1 -; OPT-NEXT: [[TMP8]] = add i64 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP7]], align 1 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; OPT-NEXT: store <4 x i32> [[TMP8]], <4 x i32> addrspace(1)* [[TMP9]], align 1 +; OPT-NEXT: [[TMP10]] = add i64 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i64 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 1 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 1 -; OPT-NEXT: [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = add i64 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i64 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 1 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i64 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 1 +; OPT-NEXT: [[TMP18]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i64 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 %src, i64 %n, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 1 %dst, i8 addrspace(1)* align 1 %src, i64 %n, i1 false) ret void } -define amdgpu_kernel void @memcpy_local_align4_local_align4_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) #0 { +define amdgpu_kernel void @memcpy_local_align4_local_align4_variable(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { ; OPT-LABEL: @memcpy_local_align4_local_align4_variable( -; OPT-NEXT: [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8 -; OPT-NEXT: [[TMP2:%.*]] = urem i32 [[N]], 8 -; OPT-NEXT: [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to <2 x i32> addrspace(3)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(3)* [[DST:%.*]] to <2 x i32> addrspace(3)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8 +; OPT-NEXT: [[TMP4:%.*]] = urem i32 [[N]], 8 +; OPT-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(3) [[TMP5]], align 4 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(3) [[TMP7]], align 4 -; OPT-NEXT: [[TMP8]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(3)* [[TMP7]], align 4 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store <2 x i32> [[TMP8]], <2 x i32> addrspace(3)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP10]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 4 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[DST]], i32 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(3) [[TMP13]], align 4 -; OPT-NEXT: [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP1]] to i8 addrspace(3)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP2]] to i8 addrspace(3)* +; OPT-NEXT: [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 4 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP13]], i32 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(3)* [[TMP17]], align 4 +; OPT-NEXT: [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) align 4 %dst, ptr addrspace(3) align 4 %src, i32 %n, i1 false) + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 4 %dst, i8 addrspace(3)* align 4 %src, i32 %n, i1 false) ret void } -define amdgpu_kernel void @memcpy_local_align2_local_align2_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) #0 { +define amdgpu_kernel void @memcpy_local_align2_local_align2_variable(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { ; OPT-LABEL: @memcpy_local_align2_local_align2_variable( -; OPT-NEXT: [[TMP1:%.*]] = udiv i32 [[N:%.*]], 2 -; OPT-NEXT: [[TMP2:%.*]] = urem i32 [[N]], 2 -; OPT-NEXT: [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to i16 addrspace(3)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(3)* [[DST:%.*]] to i16 addrspace(3)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i32 [[N:%.*]], 2 +; OPT-NEXT: [[TMP4:%.*]] = urem i32 [[N]], 2 +; OPT-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load i16, ptr addrspace(3) [[TMP5]], align 2 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr addrspace(3) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store i16 [[TMP6]], ptr addrspace(3) [[TMP7]], align 2 -; OPT-NEXT: [[TMP8]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16 addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load i16, i16 addrspace(3)* [[TMP7]], align 2 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16 addrspace(3)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store i16 [[TMP8]], i16 addrspace(3)* [[TMP9]], align 2 +; OPT-NEXT: [[TMP10]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 2 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[DST]], i32 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(3) [[TMP13]], align 2 -; OPT-NEXT: [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast i16 addrspace(3)* [[TMP1]] to i8 addrspace(3)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast i16 addrspace(3)* [[TMP2]] to i8 addrspace(3)* +; OPT-NEXT: [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 2 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP13]], i32 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(3)* [[TMP17]], align 2 +; OPT-NEXT: [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) align 2 %dst, ptr addrspace(3) align 2 %src, i32 %n, i1 false) + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 2 %dst, i8 addrspace(3)* align 2 %src, i32 %n, i1 false) ret void } -define amdgpu_kernel void @memcpy_local_align1_local_align1_variable(ptr addrspace(3) %dst, ptr addrspace(3) %src, i32 %n) #0 { +define amdgpu_kernel void @memcpy_local_align1_local_align1_variable(i8 addrspace(3)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { ; OPT-LABEL: @memcpy_local_align1_local_align1_variable( -; OPT-NEXT: [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8 -; OPT-NEXT: [[TMP2:%.*]] = urem i32 [[N]], 8 -; OPT-NEXT: [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to <2 x i32> addrspace(3)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(3)* [[DST:%.*]] to <2 x i32> addrspace(3)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8 +; OPT-NEXT: [[TMP4:%.*]] = urem i32 [[N]], 8 +; OPT-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(3) [[TMP5]], align 1 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(3) [[TMP7]], align 1 -; OPT-NEXT: [[TMP8]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(3)* [[TMP7]], align 1 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store <2 x i32> [[TMP8]], <2 x i32> addrspace(3)* [[TMP9]], align 1 +; OPT-NEXT: [[TMP10]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 1 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[DST]], i32 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(3) [[TMP13]], align 1 -; OPT-NEXT: [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP1]] to i8 addrspace(3)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP2]] to i8 addrspace(3)* +; OPT-NEXT: [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 1 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP13]], i32 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(3)* [[TMP17]], align 1 +; OPT-NEXT: [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p3i8.p3i8.i32(ptr addrspace(3) align 1 %dst, ptr addrspace(3) align 1 %src, i32 %n, i1 false) + call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* align 1 %dst, i8 addrspace(3)* align 1 %src, i32 %n, i1 false) ret void } -define amdgpu_kernel void @memcpy_local_align4_global_align4_variable(ptr addrspace(3) %dst, ptr addrspace(1) %src, i32 %n) #0 { +define amdgpu_kernel void @memcpy_local_align4_global_align4_variable(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 %n) #0 { ; OPT-LABEL: @memcpy_local_align4_global_align4_variable( -; OPT-NEXT: [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8 -; OPT-NEXT: [[TMP2:%.*]] = urem i32 [[N]], 8 -; OPT-NEXT: [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <2 x i32> addrspace(1)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(3)* [[DST:%.*]] to <2 x i32> addrspace(3)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8 +; OPT-NEXT: [[TMP4:%.*]] = urem i32 [[N]], 8 +; OPT-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(1) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP5]], align 4 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(3) [[TMP7]], align 4 -; OPT-NEXT: [[TMP8]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(1)* [[TMP7]], align 4 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store <2 x i32> [[TMP8]], <2 x i32> addrspace(3)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP10]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i32 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(1) [[TMP11]], align 4 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[DST]], i32 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(3) [[TMP13]], align 4 -; OPT-NEXT: [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> addrspace(1)* [[TMP1]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP2]] to i8 addrspace(3)* +; OPT-NEXT: [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP12]], i32 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(1)* [[TMP15]], align 4 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP13]], i32 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(3)* [[TMP17]], align 4 +; OPT-NEXT: [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p3i8.p1i8.i32(ptr addrspace(3) align 4 %dst, ptr addrspace(1) align 4 %src, i32 %n, i1 false) + call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* align 4 %dst, i8 addrspace(1)* align 4 %src, i32 %n, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_local_align4_variable(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 %n) #0 { +define amdgpu_kernel void @memcpy_global_align4_local_align4_variable(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 %n) #0 { ; OPT-LABEL: @memcpy_global_align4_local_align4_variable( -; OPT-NEXT: [[TMP1:%.*]] = udiv i32 [[N:%.*]], 8 -; OPT-NEXT: [[TMP2:%.*]] = urem i32 [[N]], 8 -; OPT-NEXT: [[TMP3:%.*]] = sub i32 [[N]], [[TMP2]] -; OPT-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], 0 -; OPT-NEXT: br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] +; OPT-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(3)* [[SRC:%.*]] to <2 x i32> addrspace(3)* +; OPT-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <2 x i32> addrspace(1)* +; OPT-NEXT: [[TMP3:%.*]] = udiv i32 [[N:%.*]], 8 +; OPT-NEXT: [[TMP4:%.*]] = urem i32 [[N]], 8 +; OPT-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[TMP4]] +; OPT-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP3]], 0 +; OPT-NEXT: br i1 [[TMP6]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]] ; OPT: loop-memcpy-expansion: -; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] -; OPT-NEXT: [[TMP5:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(3) [[SRC:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: [[TMP6:%.*]] = load <2 x i32>, ptr addrspace(3) [[TMP5]], align 4 -; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, ptr addrspace(1) [[DST:%.*]], i32 [[LOOP_INDEX]] -; OPT-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 4 -; OPT-NEXT: [[TMP8]] = add i32 [[LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], [[TMP1]] -; OPT-NEXT: br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] +; OPT-NEXT: [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP10:%.*]], [[LOOP_MEMCPY_EXPANSION]] ] +; OPT-NEXT: [[TMP7:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(3)* [[TMP1]], i32 [[LOOP_INDEX]] +; OPT-NEXT: [[TMP8:%.*]] = load <2 x i32>, <2 x i32> addrspace(3)* [[TMP7]], align 4 +; OPT-NEXT: [[TMP9:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* [[TMP2]], i32 [[LOOP_INDEX]] +; OPT-NEXT: store <2 x i32> [[TMP8]], <2 x i32> addrspace(1)* [[TMP9]], align 4 +; OPT-NEXT: [[TMP10]] = add i32 [[LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP3]] +; OPT-NEXT: br i1 [[TMP11]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]] ; OPT: loop-memcpy-residual: -; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] -; OPT-NEXT: [[TMP10:%.*]] = add i32 [[TMP3]], [[RESIDUAL_LOOP_INDEX]] -; OPT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[SRC]], i32 [[TMP10]] -; OPT-NEXT: [[TMP12:%.*]] = load i8, ptr addrspace(3) [[TMP11]], align 4 -; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i32 [[TMP10]] -; OPT-NEXT: store i8 [[TMP12]], ptr addrspace(1) [[TMP13]], align 4 -; OPT-NEXT: [[TMP14]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 -; OPT-NEXT: [[TMP15:%.*]] = icmp ult i32 [[TMP14]], [[TMP2]] -; OPT-NEXT: br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] +; OPT-NEXT: [[RESIDUAL_LOOP_INDEX:%.*]] = phi i32 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP18:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ] +; OPT-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> addrspace(3)* [[TMP1]] to i8 addrspace(3)* +; OPT-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> addrspace(1)* [[TMP2]] to i8 addrspace(1)* +; OPT-NEXT: [[TMP14:%.*]] = add i32 [[TMP5]], [[RESIDUAL_LOOP_INDEX]] +; OPT-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[TMP12]], i32 [[TMP14]] +; OPT-NEXT: [[TMP16:%.*]] = load i8, i8 addrspace(3)* [[TMP15]], align 4 +; OPT-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP13]], i32 [[TMP14]] +; OPT-NEXT: store i8 [[TMP16]], i8 addrspace(1)* [[TMP17]], align 4 +; OPT-NEXT: [[TMP18]] = add i32 [[RESIDUAL_LOOP_INDEX]], 1 +; OPT-NEXT: [[TMP19:%.*]] = icmp ult i32 [[TMP18]], [[TMP4]] +; OPT-NEXT: br i1 [[TMP19]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]] ; OPT: post-loop-memcpy-expansion: ; OPT-NEXT: ret void ; OPT: loop-memcpy-residual-header: -; OPT-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP2]], 0 -; OPT-NEXT: br i1 [[TMP16]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] +; OPT-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP4]], 0 +; OPT-NEXT: br i1 [[TMP20]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]] ; - call void @llvm.memcpy.p1i8.p3i8.i32(ptr addrspace(1) align 4 %dst, ptr addrspace(3) align 4 %src, i32 %n, i1 false) + call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %dst, i8 addrspace(3)* align 4 %src, i32 %n, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_16(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_16(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; MAX1024-LABEL: @memcpy_global_align4_global_align4_16( -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 16, i1 false) +; MAX1024-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 16, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memcpy_global_align4_global_align4_16( +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to <4 x i32> addrspace(1)* +; ALL-NEXT: [[TMP2:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to <4 x i32> addrspace(1)* ; ALL-NEXT: br label [[LOAD_STORE_LOOP:%.*]] ; ALL: load-store-loop: -; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ] -; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]] -; ALL-NEXT: store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; ALL-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 1 -; ALL-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1 -; ALL-NEXT: br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] +; ALL-NEXT: [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], [[LOAD_STORE_LOOP]] ] +; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP1]], i64 [[LOOP_INDEX]] +; ALL-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP3]], align 4 +; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[TMP2]], i64 [[LOOP_INDEX]] +; ALL-NEXT: store <4 x i32> [[TMP4]], <4 x i32> addrspace(1)* [[TMP5]], align 4 +; ALL-NEXT: [[TMP6]] = add i64 [[LOOP_INDEX]], 1 +; ALL-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 1 +; ALL-NEXT: br i1 [[TMP7]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]] ; ALL: memcpy-split: ; ALL-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 16, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 16, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_12(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_12(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; MAX1024-LABEL: @memcpy_global_align4_global_align4_12( -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 12, i1 false) +; MAX1024-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 12, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memcpy_global_align4_global_align4_12( -; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST:%.*]], i64 0 -; ALL-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC]], i64 2 -; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(1) [[TMP4]], align 4 -; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST]], i64 2 -; ALL-NEXT: store i32 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i64 addrspace(1)* +; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP1]], i64 0 +; ALL-NEXT: [[TMP3:%.*]] = load i64, i64 addrspace(1)* [[TMP2]], align 4 +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i64 addrspace(1)* +; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP4]], i64 0 +; ALL-NEXT: store i64 [[TMP3]], i64 addrspace(1)* [[TMP5]], align 4 +; ALL-NEXT: [[TMP6:%.*]] = bitcast i8 addrspace(1)* [[SRC]] to i32 addrspace(1)* +; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP6]], i64 2 +; ALL-NEXT: [[TMP8:%.*]] = load i32, i32 addrspace(1)* [[TMP7]], align 4 +; ALL-NEXT: [[TMP9:%.*]] = bitcast i8 addrspace(1)* [[DST]] to i32 addrspace(1)* +; ALL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP9]], i64 2 +; ALL-NEXT: store i32 [[TMP8]], i32 addrspace(1)* [[TMP10]], align 4 ; ALL-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 12, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 12, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_8(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; MAX1024-LABEL: @memcpy_global_align4_global_align4_8( -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 8, i1 false) +; MAX1024-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 8, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memcpy_global_align4_global_align4_8( -; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST:%.*]], i64 0 -; ALL-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i64 addrspace(1)* +; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP1]], i64 0 +; ALL-NEXT: [[TMP3:%.*]] = load i64, i64 addrspace(1)* [[TMP2]], align 4 +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i64 addrspace(1)* +; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP4]], i64 0 +; ALL-NEXT: store i64 [[TMP3]], i64 addrspace(1)* [[TMP5]], align 4 ; ALL-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 8, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 8, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_10(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_10(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; MAX1024-LABEL: @memcpy_global_align4_global_align4_10( -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 10, i1 false) +; MAX1024-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 10, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memcpy_global_align4_global_align4_10( -; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[SRC:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[DST:%.*]], i64 0 -; ALL-NEXT: store i64 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 -; ALL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC]], i64 4 -; ALL-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(1) [[TMP4]], align 4 -; ALL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST]], i64 4 -; ALL-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[TMP6]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i64 addrspace(1)* +; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP1]], i64 0 +; ALL-NEXT: [[TMP3:%.*]] = load i64, i64 addrspace(1)* [[TMP2]], align 4 +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i64 addrspace(1)* +; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64 addrspace(1)* [[TMP4]], i64 0 +; ALL-NEXT: store i64 [[TMP3]], i64 addrspace(1)* [[TMP5]], align 4 +; ALL-NEXT: [[TMP6:%.*]] = bitcast i8 addrspace(1)* [[SRC]] to i16 addrspace(1)* +; ALL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP6]], i64 4 +; ALL-NEXT: [[TMP8:%.*]] = load i16, i16 addrspace(1)* [[TMP7]], align 4 +; ALL-NEXT: [[TMP9:%.*]] = bitcast i8 addrspace(1)* [[DST]] to i16 addrspace(1)* +; ALL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP9]], i64 4 +; ALL-NEXT: store i16 [[TMP8]], i16 addrspace(1)* [[TMP10]], align 4 ; ALL-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 10, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 10, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_4(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_4(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; MAX1024-LABEL: @memcpy_global_align4_global_align4_4( -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 4, i1 false) +; MAX1024-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 4, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memcpy_global_align4_global_align4_4( -; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[SRC:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[DST:%.*]], i64 0 -; ALL-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i32 addrspace(1)* +; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP1]], i64 0 +; ALL-NEXT: [[TMP3:%.*]] = load i32, i32 addrspace(1)* [[TMP2]], align 4 +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i32 addrspace(1)* +; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[TMP4]], i64 0 +; ALL-NEXT: store i32 [[TMP3]], i32 addrspace(1)* [[TMP5]], align 4 ; ALL-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 4, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 4, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_2(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_2(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; MAX1024-LABEL: @memcpy_global_align4_global_align4_2( -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 2, i1 false) +; MAX1024-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 2, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memcpy_global_align4_global_align4_2( -; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[SRC:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(1) [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[DST:%.*]], i64 0 -; ALL-NEXT: store i16 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = bitcast i8 addrspace(1)* [[SRC:%.*]] to i16 addrspace(1)* +; ALL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP1]], i64 0 +; ALL-NEXT: [[TMP3:%.*]] = load i16, i16 addrspace(1)* [[TMP2]], align 4 +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8 addrspace(1)* [[DST:%.*]] to i16 addrspace(1)* +; ALL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16 addrspace(1)* [[TMP4]], i64 0 +; ALL-NEXT: store i16 [[TMP3]], i16 addrspace(1)* [[TMP5]], align 4 ; ALL-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 2, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 2, i1 false) ret void } -define amdgpu_kernel void @memcpy_global_align4_global_align4_1(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 { +define amdgpu_kernel void @memcpy_global_align4_global_align4_1(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) #0 { ; MAX1024-LABEL: @memcpy_global_align4_global_align4_1( -; MAX1024-NEXT: call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 [[DST:%.*]], ptr addrspace(1) align 4 [[SRC:%.*]], i64 1, i1 false) +; MAX1024-NEXT: call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 [[DST:%.*]], i8 addrspace(1)* align 4 [[SRC:%.*]], i64 1, i1 false) ; MAX1024-NEXT: ret void ; ; ALL-LABEL: @memcpy_global_align4_global_align4_1( -; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 0 -; ALL-NEXT: [[TMP2:%.*]] = load i8, ptr addrspace(1) [[TMP1]], align 4 -; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 0 -; ALL-NEXT: store i8 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +; ALL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[SRC:%.*]], i64 0 +; ALL-NEXT: [[TMP2:%.*]] = load i8, i8 addrspace(1)* [[TMP1]], align 4 +; ALL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[DST:%.*]], i64 0 +; ALL-NEXT: store i8 [[TMP2]], i8 addrspace(1)* [[TMP3]], align 4 ; ALL-NEXT: ret void ; - call void @llvm.memcpy.p1i8.p1i8.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 1, i1 false) + call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* align 4 %dst, i8 addrspace(1)* align 4 %src, i64 1, i1 false) ret void } Index: mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp =================================================================== --- mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp +++ mlir/lib/Dialect/Linalg/Transforms/TilingInterfaceImpl.cpp @@ -276,6 +276,9 @@ SmallVector newOutputShape; ArrayRef oldShape = linalgOp.getShape(linalgOp.getDpsInitOperand(0)); + assert(sizes.size() == oldShape.size() + 1 && + "result tensor should have rank exactly one dimension smaller than " + "the number of loops."); SmallVector dynamicDims; for (int64_t idx : llvm::seq(0, oldShape.size() + 1)) { if (idx == insertSplitDimension) { Index: mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp =================================================================== --- mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -453,6 +453,19 @@ break; } } + { + auto origResultTensor = cast(op.getOperation()) + .getDpsInitOperand(0); + size_t origResultSize = 0; + if (auto shapedType = + origResultTensor->get().getType().dyn_cast()) + origResultSize = shapedType.getShape().size(); + if (iterationDomain.size() != origResultSize + 1) { + return b.notifyMatchFailure( + op, "only support result tensor whose rank is exactly one dimension " + "smaller than the number of loops."); + } + } // 1. create the inital tensor value. FailureOr identityTensor = op.generateInitialTensorForPartialReduction(b, loc, tileSize, Index: mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir =================================================================== --- mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir +++ mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir @@ -273,3 +273,4 @@ %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 %1:4 = transform.structured.split_reduction %0 { split_factor = 4, insert_split_dimension = 2, inner_parallel} } + Index: mlir/test/Dialect/Linalg/transform-tile-reduction.mlir =================================================================== --- mlir/test/Dialect/Linalg/transform-tile-reduction.mlir +++ mlir/test/Dialect/Linalg/transform-tile-reduction.mlir @@ -197,3 +197,36 @@ // CHECK: linalg.yield // CHECK: } -> tensor // CHECK: return %[[R]] : tensor + +// ----- + +func.func @reduction_bug(%arg0: tensor<32x32xi32>, %arg1: tensor<32x32xi32>, %out: tensor<32xi32>) -> tensor<32xi32> { + %red = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, + affine_map<(d0, d1, d2) -> (d0, d1)>, + affine_map<(d0, d1, d2) -> (d0)>], + iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<32x32xi32>, tensor<32x32xi32>) outs(%out : tensor<32xi32>) { + ^bb0(%a: i32, %b: i32, %c: i32): + %r1 = arith.muli %a, %b: i32 + %r2 = arith.addi %c, %r1 : i32 + linalg.yield %r2 : i32 + } -> tensor<32xi32> + return %red : tensor<32xi32> +} + +transform.sequence failures(suppress) { +^bb0(%arg1: !pdl.operation): + %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 + %1, %2, %3 = transform.structured.tile_reduction_using_scf %0 { tile_sizes = [0, 0, 8] } +} + +// // CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (d0, d2)> +// // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (d0, d1)> +// // CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0)> +// // CHECK-LABEL: func @reduction_bug +// // CHECK: %[[RED:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "reduction"]} +// // CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<32x32xi32>, tensor<32x32xi32>) outs(%[[F]] : tensor<32xi32>) { +// // CHECK: arith.muli +// // CHECK: arith.addi +// // CHECK: linalg.yield +// // CHECK: } -> tensor<32xi32> +// // CHECK: return %[[RED]] : tensor<32xi32>