diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2541,7 +2541,7 @@ let TargetPrefix = "aarch64" in { class SME_Load_Store_Intrinsic : DefaultAttrsIntrinsic<[], - [pred_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>; + [pred_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; // Loads def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic; @@ -2575,11 +2575,12 @@ class SME_TileToVector_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i64_ty, llvm_i32_ty]>; + [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; class SME_VectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_i64_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyvector_ty]>; + [llvm_i32_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], [ImmArg>]>; def int_aarch64_sme_read_horiz : SME_TileToVector_Intrinsic; def int_aarch64_sme_read_vert : SME_TileToVector_Intrinsic; @@ -2591,15 +2592,15 @@ def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic; def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic; - def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i64_ty]>; + def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>]>; class SME_OuterProduct_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_i64_ty, + [llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, - llvm_anyvector_ty]>; + llvm_anyvector_ty], [ImmArg>]>; def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic; def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic; @@ -2618,10 +2619,10 @@ class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_i64_ty, + [llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyvector_ty]>; + llvm_anyvector_ty], [ImmArg>]>; def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic; def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -941,6 +941,13 @@ let ParserMatchClass = Imm0_1Operand; } +// timm32_0_1 predicate - True if the 32-bit immediate is in the range [0,1] +def timm32_0_1 : Operand, TImmLeaf { + let ParserMatchClass = Imm0_1Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand, ImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_3Operand; +} + // timm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7] def timm32_0_7 : Operand, TImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_15Operand; +} + +// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31] +def timm32_0_31 : Operand, TImmLeaf { + let ParserMatchClass = Imm0_31Operand; +} + +// timm32_0_255 predicate - True if the 32-bit immediate is in the range [0,255] +def timm32_0_255 : Operand, TImmLeaf { + let ParserMatchClass = Imm0_255Operand; +} + // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr // {5-0} - imm6 @@ -1340,34 +1375,34 @@ : VectorIndex; -def sme_elm_idx0_0 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_0Operand; let PrintMethod = "printMatrixIndex"; let OperandNamespace = "AArch64"; let OperandType = "OPERAND_IMPLICIT_IMM_0"; } -def sme_elm_idx0_1 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_1Operand; let PrintMethod = "printMatrixIndex"; } -def sme_elm_idx0_3 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_3Operand; let PrintMethod = "printMatrixIndex"; } -def sme_elm_idx0_7 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_7Operand; let PrintMethod = "printMatrixIndex"; } -def sme_elm_idx0_15 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_15Operand; let PrintMethod = "printMatrixIndex"; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -10,11 +10,11 @@ // //===----------------------------------------------------------------------===// -def imm_to_tile8 : ComplexPattern", []>; -def imm_to_tile16 : ComplexPattern", []>; -def imm_to_tile32 : ComplexPattern", []>; -def imm_to_tile64 : ComplexPattern", []>; -def imm_to_tile128 : ComplexPattern", []>; +def imm_to_tile8 : ComplexPattern", []>; +def imm_to_tile16 : ComplexPattern", []>; +def imm_to_tile32 : ComplexPattern", []>; +def imm_to_tile64 : ComplexPattern", []>; +def imm_to_tile128 : ComplexPattern", []>; def tileslice8 : ComplexPattern", []>; def tileslice16 : ComplexPattern", []>; @@ -29,7 +29,7 @@ //===----------------------------------------------------------------------===// class sme_outer_product_pseudo - : Pseudo<(outs), (ins i64imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, + : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, zpr_ty:$zn, zpr_ty:$zm), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp @@ -69,9 +69,9 @@ def NAME # _PSEUDO : sme_outer_product_pseudo; - def : Pat<(op imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), (nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)), - (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>; } multiclass sme_outer_product_fp64 { @@ -82,9 +82,9 @@ def NAME # _PSEUDO : sme_outer_product_pseudo; - def : Pat<(op imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), (nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)), - (!cast(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>; } class sme_int_outer_product_inst; - def : Pat<(op imm0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm), (nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)), - (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>; } multiclass sme_int_outer_product_i64 opc, string mnemonic, @@ -140,9 +140,9 @@ def NAME # _PSEUDO : sme_outer_product_pseudo; - def : Pat<(op imm0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), (nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)), - (!cast(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>; } class sme_outer_product_widening_inst @@ -174,9 +174,9 @@ def NAME # _PSEUDO : sme_outer_product_pseudo; - def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), (nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)), - (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>; } multiclass sme_f16_outer_product { @@ -184,9 +184,9 @@ def NAME # _PSEUDO : sme_outer_product_pseudo; - def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), (nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)), - (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>; } //===----------------------------------------------------------------------===// @@ -229,7 +229,7 @@ class sme_add_vector_to_tile_pseudo : Pseudo<(outs), - (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, + (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let usesCustomInserter = 1; @@ -239,26 +239,26 @@ def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo; def : Pat<(int_aarch64_sme_addha - imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), (nxv4i32 ZPR32:$zn)), - (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; + (ADDHA_MPPZ_PSEUDO_S timm32_0_3:$tile, $pn, $pm, $zn)>; def : Pat<(int_aarch64_sme_addva - imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), (nxv4i32 ZPR32:$zn)), - (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; + (ADDVA_MPPZ_PSEUDO_S timm32_0_3:$tile, $pn, $pm, $zn)>; let Predicates = [HasSMEI16I64] in { def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo; def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo; def : Pat<(int_aarch64_sme_addha - imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), (nxv2i64 ZPR64:$zn)), - (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; + (ADDHA_MPPZ_PSEUDO_D timm32_0_7:$tile, $pn, $pm, $zn)>; def : Pat<(int_aarch64_sme_addva - imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), (nxv2i64 ZPR64:$zn)), - (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; + (ADDVA_MPPZ_PSEUDO_D timm32_0_7:$tile, $pn, $pm, $zn)>; } //===----------------------------------------------------------------------===// @@ -350,8 +350,8 @@ } class sme_load_pseudo - : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx, - i64imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>, + : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, + i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let usesCustomInserter = 1; @@ -409,27 +409,27 @@ defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_B), !if(is_col, int_aarch64_sme_ld1b_vert, int_aarch64_sme_ld1b_horiz), - sme_elm_idx0_0, imm0_15, am_sve_regreg_lsl0, + sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0, tileslice8>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_H), !if(is_col, int_aarch64_sme_ld1h_vert, int_aarch64_sme_ld1h_horiz), - imm0_1, imm0_7, am_sve_regreg_lsl1, + timm32_0_1, timm32_0_7, am_sve_regreg_lsl1, tileslice16>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_S), !if(is_col, int_aarch64_sme_ld1w_vert, int_aarch64_sme_ld1w_horiz), - imm0_3, imm0_3, am_sve_regreg_lsl2, + timm32_0_3, timm32_0_3, am_sve_regreg_lsl2, tileslice32>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_D), !if(is_col, int_aarch64_sme_ld1d_vert, int_aarch64_sme_ld1d_horiz), - imm0_7, imm0_1, am_sve_regreg_lsl3, + timm32_0_7, timm32_0_1, am_sve_regreg_lsl3, tileslice64>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_Q), !if(is_col, int_aarch64_sme_ld1q_vert, int_aarch64_sme_ld1q_horiz), - imm0_15, sme_elm_idx0_0, am_sve_regreg_lsl4, + timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4, tileslice128>; } @@ -539,22 +539,22 @@ defm : sme_mem_st_ss_patterns(NAME # _B), !if(is_col, int_aarch64_sme_st1b_vert, int_aarch64_sme_st1b_horiz), - imm0_15, imm_to_tile8, am_sve_regreg_lsl0, + timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0, tileslice8>; defm : sme_mem_st_ss_patterns(NAME # _H), !if(is_col, int_aarch64_sme_st1h_vert, int_aarch64_sme_st1h_horiz), - imm0_7, imm_to_tile16, am_sve_regreg_lsl1, + timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1, tileslice16>; defm : sme_mem_st_ss_patterns(NAME # _S), !if(is_col, int_aarch64_sme_st1w_vert, int_aarch64_sme_st1w_horiz), - imm0_3, imm_to_tile32, am_sve_regreg_lsl2, + timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2, tileslice32>; defm : sme_mem_st_ss_patterns(NAME # _D), !if(is_col, int_aarch64_sme_st1d_vert, int_aarch64_sme_st1d_horiz), - imm0_1, imm_to_tile64, am_sve_regreg_lsl3, + timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3, tileslice64>; defm : sme_mem_st_ss_patterns(NAME # _Q), !if(is_col, int_aarch64_sme_st1q_vert, @@ -700,8 +700,8 @@ } class sme_mova_insert_pseudo - : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx, - i64imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>, + : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, + i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let usesCustomInserter = 1; @@ -779,28 +779,28 @@ int_aarch64_sme_write_horiz); defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_B), - nxv16i8, nxv16i1, sme_elm_idx0_0, imm0_15, + nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15, op, tileslice8>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), - nxv8i16, nxv8i1, sme_elm_idx0_1, imm0_7, + nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, op, tileslice16>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), - nxv8f16, nxv8i1, sme_elm_idx0_1, imm0_7, + nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, op, tileslice16>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), - nxv8bf16, nxv8i1, sme_elm_idx0_1, imm0_7, + nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, op, tileslice16>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_S), - nxv4i32, nxv4i1, sme_elm_idx0_3, imm0_3, + nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, op, tileslice32>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_S), - nxv4f32, nxv4i1, sme_elm_idx0_3, imm0_3, + nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, op, tileslice32>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_D), - nxv2i64, nxv2i1, sme_elm_idx0_7, imm0_1, + nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, op, tileslice64>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_D), - nxv2f64, nxv2i1, sme_elm_idx0_7, imm0_1, + nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, op, tileslice64>; defvar opq = !if(is_col, int_aarch64_sme_writeq_vert, @@ -946,28 +946,28 @@ int_aarch64_sme_read_horiz); defm : sme_tile_to_vector_patterns(NAME # _B), - nxv16i8, nxv16i1, imm0_15, + nxv16i8, nxv16i1, sme_elm_idx0_15, imm_to_tile8, tileslice8, op>; defm : sme_tile_to_vector_patterns(NAME # _H), - nxv8i16, nxv8i1, imm0_7, + nxv8i16, nxv8i1, sme_elm_idx0_7, imm_to_tile16, tileslice16, op>; defm : sme_tile_to_vector_patterns(NAME # _H), - nxv8f16, nxv8i1, imm0_7, + nxv8f16, nxv8i1, sme_elm_idx0_7, imm_to_tile16, tileslice16, op>; defm : sme_tile_to_vector_patterns(NAME # _H), - nxv8bf16, nxv8i1, imm0_7, + nxv8bf16, nxv8i1, sme_elm_idx0_7, imm_to_tile16, tileslice16, op>; defm : sme_tile_to_vector_patterns(NAME # _S), - nxv4i32, nxv4i1, imm0_3, + nxv4i32, nxv4i1, sme_elm_idx0_3, imm_to_tile32, tileslice32, op>; defm : sme_tile_to_vector_patterns(NAME # _S), - nxv4f32, nxv4i1, imm0_3, + nxv4f32, nxv4i1, sme_elm_idx0_3, imm_to_tile32, tileslice32, op>; defm : sme_tile_to_vector_patterns(NAME # _D), - nxv2i64, nxv2i1, imm0_1, + nxv2i64, nxv2i1, sme_elm_idx0_1, imm_to_tile64, tileslice64, op>; defm : sme_tile_to_vector_patterns(NAME # _D), - nxv2f64, nxv2i1, imm0_1, + nxv2f64, nxv2i1, sme_elm_idx0_1, imm_to_tile64, tileslice64, op>; defvar opq = !if(is_col, int_aarch64_sme_readq_vert, @@ -1038,14 +1038,14 @@ def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast(NAME) 0b11011101), 1>; def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast(NAME) 0b11101110), 1>; - def NAME # _PSEUDO : Pseudo<(outs), (ins i64imm:$tilelist), []>, + def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let usesCustomInserter = 1; } - def : Pat<(int_aarch64_sme_zero imm:$imm), - (!cast(NAME # _PSEUDO) imm:$imm)>; + def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm), + (!cast(NAME # _PSEUDO) timm32_0_255:$imm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll @@ -6,7 +6,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: addha za0.s, p0/m, p1/m, z0.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, %pn, %pm, %zn) + call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, %pn, %pm, %zn) ret void } @@ -15,7 +15,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: addva za3.s, p0/m, p1/m, z0.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, %pn, %pm, %zn) + call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, %pn, %pm, %zn) ret void } @@ -24,7 +24,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: addha za0.d, p0/m, p1/m, z0.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, %pn, %pm, %zn) + call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, %pn, %pm, %zn) ret void } @@ -33,11 +33,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: addva za7.d, p0/m, p1/m, z0.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, %pn, %pm, %zn) + call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, %pn, %pm, %zn) ret void } -declare void @llvm.aarch64.sme.addha.nxv4i32(i64, , , ) -declare void @llvm.aarch64.sme.addha.nxv2i64(i64, , , ) -declare void @llvm.aarch64.sme.addva.nxv4i32(i64, , , ) -declare void @llvm.aarch64.sme.addva.nxv2i64(i64, , , ) +declare void @llvm.aarch64.sme.addha.nxv4i32(i32, , , ) +declare void @llvm.aarch64.sme.addha.nxv2i64(i32, , , ) +declare void @llvm.aarch64.sme.addva.nxv4i32(i32, , , ) +declare void @llvm.aarch64.sme.addva.nxv2i64(i32, , , ) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: ld1b {za0v.b[w13, 0]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %ptr, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %ptr, i32 0, i32 0) ret void; } @@ -25,8 +25,8 @@ ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %base, i32 0, i32 %tileslice) ret void; } @@ -41,10 +41,10 @@ ; CHECK-NEXT: ld1h {za1v.h[w12, 7]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i64 1, i32 %tileslice) + call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i32 1, i32 %tileslice) ret void; } @@ -58,8 +58,8 @@ ; CHECK-NEXT: ret %base = getelementptr i16, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %base, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %base, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %base, i32 1, i32 0) ret void; } @@ -78,14 +78,14 @@ ; CHECK-NEXT: ld1w {za3v.s[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 3, i32 %tileslice) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 2, i32 %tileslice) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i32 3, i32 %tileslice) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i32 2, i32 %tileslice) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i32 3, i32 0) ret void; } @@ -99,8 +99,8 @@ ; CHECK-NEXT: ret %base = getelementptr i32, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %base, i64 3, i32 %tileslice) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %base, i32 3, i32 %tileslice) ret void; } @@ -127,22 +127,22 @@ ; CHECK-NEXT: ld1d {za7v.d[w12, 1]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 4, i32 %tileslice) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 7, i32 %tileslice) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 4, i32 %tileslice) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 7, i32 %tileslice) ret void; } @@ -156,8 +156,8 @@ ; CHECK-NEXT: ret %base = getelementptr i64, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %base, i64 7, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %base, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %base, i32 7, i32 0) ret void; } @@ -198,38 +198,38 @@ ; CHECK-NEXT: ld1q {za14v.q[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 15, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 15, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 8, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 9, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 10, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 11, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 12, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 13, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 14, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 15, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 8, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 9, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 10, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 11, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 12, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 13, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 14, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 15, i32 0) ret void; } @@ -241,8 +241,8 @@ ; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %base = getelementptr i128, ptr %ptr, i64 %index - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %base, i64 15, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %base, i32 15, i32 0) ret void; } @@ -317,9 +317,9 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i64 0, i32 %base) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i64 0, i32 %add1) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i64 0, i32 %add2) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i32 0, i32 %base) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i32 0, i32 %add1) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i32 0, i32 %add2) %inc = add nuw nsw i32 %i, 1 %exitcond.not = icmp eq i32 %inc, %N br i1 %exitcond.not, label %exit, label %for.body @@ -329,16 +329,16 @@ } -declare void @llvm.aarch64.sme.ld1b.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1h.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1w.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1d.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1q.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1b.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1h.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1w.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1d.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1q.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1b.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1h.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1w.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1d.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1q.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1b.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1h.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1w.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1d.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1q.vert(, ptr, i32, i32) declare void @llvm.aarch64.sme.ldr(i32, ptr) declare i64 @llvm.vscale.i64() diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll @@ -6,7 +6,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmopa za0.s, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -15,7 +15,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmopa za1.s, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -24,7 +24,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: smopa za2.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -33,7 +33,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: smopa za0.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -42,7 +42,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umopa za3.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64 3, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 3, %pn, %pm, %zn, %zm) ret void } @@ -51,7 +51,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umopa za1.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -60,7 +60,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmopa za0.s, p0/m, p1/m, z0.s, z1.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mopa.nxv4f32(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mopa.nxv4f32(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -69,7 +69,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmopa za2.d, p0/m, p1/m, z0.d, z1.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mopa.nxv2f64(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mopa.nxv2f64(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -78,7 +78,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sumopa za1.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -87,7 +87,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sumopa za3.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64 3, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 3, %pn, %pm, %zn, %zm) ret void } @@ -96,7 +96,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: usmopa za2.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -105,22 +105,22 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: usmopa za7.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64 7, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, %pn, %pm, %zn, %zm) ret void } attributes #0 = { "target-features"="+sme-i16i64" } attributes #1 = { "target-features"="+sme-f64f64" } -declare void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64, , , , ) -declare void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64, , , , ) -declare void @llvm.aarch64.sme.mopa.nxv4f32(i64, , , , ) -declare void @llvm.aarch64.sme.mopa.nxv2f64(i64, , , , ) -declare void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32, , , , ) +declare void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32, , , , ) +declare void @llvm.aarch64.sme.mopa.nxv4f32(i32, , , , ) +declare void @llvm.aarch64.sme.mopa.nxv2f64(i32, , , , ) +declare void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32, , , , ) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll @@ -6,7 +6,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmops za0.s, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -15,7 +15,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmops za1.s, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mops.wide.nxv8f16(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -24,7 +24,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: smops za2.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smops.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -33,7 +33,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: smops za0.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smops.wide.nxv8i16(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -42,7 +42,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umops za3.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umops.wide.nxv16i8(i64 3, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 3, %pn, %pm, %zn, %zm) ret void } @@ -51,7 +51,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umops za1.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umops.wide.nxv8i16(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -60,7 +60,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmops za0.s, p0/m, p1/m, z0.s, z1.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mops.nxv4f32(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mops.nxv4f32(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -69,7 +69,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmops za2.d, p0/m, p1/m, z0.d, z1.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mops.nxv2f64(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mops.nxv2f64(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -78,7 +78,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sumops za1.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -87,7 +87,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sumops za3.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64 3, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 3, %pn, %pm, %zn, %zm) ret void } @@ -96,7 +96,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: usmops za2.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -105,22 +105,22 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: usmops za7.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64 7, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, %pn, %pm, %zn, %zm) ret void } attributes #0 = { "target-features"="+sme-i16i64" } attributes #1 = { "target-features"="+sme-f64f64" } -declare void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64, , , , ) -declare void @llvm.aarch64.sme.mops.wide.nxv8f16(i64, , , , ) -declare void @llvm.aarch64.sme.mops.nxv4f32(i64, , , , ) -declare void @llvm.aarch64.sme.mops.nxv2f64(i64, , , , ) -declare void @llvm.aarch64.sme.smops.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.smops.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.umops.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.umops.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32, , , , ) +declare void @llvm.aarch64.sme.mops.wide.nxv8f16(i32, , , , ) +declare void @llvm.aarch64.sme.mops.nxv4f32(i32, , , , ) +declare void @llvm.aarch64.sme.mops.nxv2f64(i32, , , , ) +declare void @llvm.aarch64.sme.smops.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.smops.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.umops.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.umops.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32, , , , ) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll @@ -22,21 +22,21 @@ ; CHECK-NEXT: mov z0.b, p0/m, za0h.b[w12, 14] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice) %tileslice.2 = add i32 %tileslice, 2 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.2) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.4 = add i32 %tileslice, 4 - %z2 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.4) + %z2 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.4) %tileslice.6 = add i32 %tileslice, 6 - %z3 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.6) + %z3 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.6) %tileslice.8 = add i32 %tileslice, 8 - %z4 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.8) + %z4 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.8) %tileslice.10 = add i32 %tileslice, 10 - %z5 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.10) + %z5 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.10) %tileslice.12 = add i32 %tileslice, 12 - %z6 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.12) + %z6 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.12) %tileslice.14 = add i32 %tileslice, 14 - %z7 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.14) + %z7 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.14) ret %z0 } @@ -62,21 +62,21 @@ ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret %tileslice.1 = add i32 %tileslice, 1 - %z0 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.1) + %z0 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.1) %tileslice.3 = add i32 %tileslice, 3 - %z1 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.3) + %z1 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.3) %tileslice.5 = add i32 %tileslice, 5 - %z2 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.5) + %z2 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.5) %tileslice.7 = add i32 %tileslice, 7 - %z3 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.7) + %z3 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.7) %tileslice.9 = add i32 %tileslice, 9 - %z4 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.9) + %z4 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.9) %tileslice.11 = add i32 %tileslice, 11 - %z5 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.11) + %z5 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.11) %tileslice.13 = add i32 %tileslice, 13 - %z6 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.13) + %z6 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.13) %tileslice.15 = add i32 %tileslice, 15 - %z7 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.15) + %z7 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.15) ret %z0 } @@ -93,13 +93,13 @@ ; CHECK-NEXT: mov z0.h, p0/m, za0h.h[w12, 6] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i32 0, i32 %tileslice) %tileslice.2 = add i32 %tileslice, 2 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i64 0, i32 %tileslice.2) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.4 = add i32 %tileslice, 4 - %z2 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i64 0, i32 %tileslice.4) + %z2 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i32 0, i32 %tileslice.4) %tileslice.6 = add i32 %tileslice, 6 - %z3 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i64 0, i32 %tileslice.6) + %z3 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i32 0, i32 %tileslice.6) ret %z0 } @@ -117,13 +117,13 @@ ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret %tileslice.1 = add i32 %tileslice, 1 - %z0 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i64 1, i32 %tileslice.1) + %z0 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i32 1, i32 %tileslice.1) %tileslice.3 = add i32 %tileslice, 3 - %z1 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i64 1, i32 %tileslice.3) + %z1 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i32 1, i32 %tileslice.3) %tileslice.5 = add i32 %tileslice, 5 - %z2 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i64 1, i32 %tileslice.5) + %z2 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i32 1, i32 %tileslice.5) %tileslice.7 = add i32 %tileslice, 7 - %z3 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i64 1, i32 %tileslice.7) + %z3 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i32 1, i32 %tileslice.7) ret %z0 } @@ -148,21 +148,21 @@ ; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i32 0, i32 %tileslice) %tileslice.1 = add i32 %tileslice, 1 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.1) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.1) %tileslice.2 = add i32 %tileslice, 2 - %z2 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.2) + %z2 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.3 = add i32 %tileslice, 3 - %z3 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.3) + %z3 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.3) %tileslice.4 = add i32 %tileslice, 4 - %z4 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.4) + %z4 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.4) %tileslice.5 = add i32 %tileslice, 5 - %z5 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.5) + %z5 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.5) %tileslice.6 = add i32 %tileslice, 6 - %z6 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.6) + %z6 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.6) %tileslice.7 = add i32 %tileslice, 7 - %z7 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.7) + %z7 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.7) ret %z0 } @@ -187,21 +187,21 @@ ; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice) %tileslice.1 = add i32 %tileslice, 1 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.1) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.1) %tileslice.2 = add i32 %tileslice, 2 - %z2 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.2) + %z2 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.3 = add i32 %tileslice, 3 - %z3 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.3) + %z3 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.3) %tileslice.4 = add i32 %tileslice, 4 - %z4 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.4) + %z4 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.4) %tileslice.5 = add i32 %tileslice, 5 - %z5 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.5) + %z5 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.5) %tileslice.6 = add i32 %tileslice, 6 - %z6 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.6) + %z6 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.6) %tileslice.7 = add i32 %tileslice, 7 - %z7 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.7) + %z7 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.7) ret %z0 } @@ -214,9 +214,9 @@ ; CHECK-NEXT: mov z0.s, p0/m, za0h.s[w12, 2] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv4i32( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv4i32( %zd, %pg, i32 0, i32 %tileslice) %tileslice.2 = add i32 %tileslice, 2 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv4i32( %zd, %pg, i64 0, i32 %tileslice.2) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv4i32( %zd, %pg, i32 0, i32 %tileslice.2) ret %z0 } @@ -230,9 +230,9 @@ ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret %tileslice.1 = add i32 %tileslice, 1 - %z0 = call @llvm.aarch64.sme.read.vert.nxv4i32( %zd, %pg, i64 3, i32 %tileslice.1) + %z0 = call @llvm.aarch64.sme.read.vert.nxv4i32( %zd, %pg, i32 3, i32 %tileslice.1) %tileslice.3 = add i32 %tileslice, 3 - %z1 = call @llvm.aarch64.sme.read.vert.nxv4i32( %zd, %pg, i64 3, i32 %tileslice.3) + %z1 = call @llvm.aarch64.sme.read.vert.nxv4i32( %zd, %pg, i32 3, i32 %tileslice.3) ret %z0 } @@ -249,13 +249,13 @@ ; CHECK-NEXT: mov z0.s, p0/m, za0v.s[w12, 3] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv4f32( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv4f32( %zd, %pg, i32 0, i32 %tileslice) %tileslice.1 = add i32 %tileslice, 1 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv4f32( %zd, %pg, i64 0, i32 %tileslice.1) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv4f32( %zd, %pg, i32 0, i32 %tileslice.1) %tileslice.2 = add i32 %tileslice, 2 - %z2 = call @llvm.aarch64.sme.read.vert.nxv4f32( %zd, %pg, i64 0, i32 %tileslice.2) + %z2 = call @llvm.aarch64.sme.read.vert.nxv4f32( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.3 = add i32 %tileslice, 3 - %z3 = call @llvm.aarch64.sme.read.vert.nxv4f32( %zd, %pg, i64 0, i32 %tileslice.3) + %z3 = call @llvm.aarch64.sme.read.vert.nxv4f32( %zd, %pg, i32 0, i32 %tileslice.3) ret %z0 } @@ -265,7 +265,7 @@ ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z0.d, p0/m, za0h.d[w12, 0] ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv2i64( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv2i64( %zd, %pg, i32 0, i32 %tileslice) ret %z0 } @@ -276,7 +276,7 @@ ; CHECK-NEXT: mov z0.d, p0/m, za1v.d[w12, 1] ; CHECK-NEXT: ret %tileslice.1 = add i32 %tileslice, 1 - %z0 = call @llvm.aarch64.sme.read.vert.nxv2i64( %zd, %pg, i64 1, i32 %tileslice.1) + %z0 = call @llvm.aarch64.sme.read.vert.nxv2i64( %zd, %pg, i32 1, i32 %tileslice.1) ret %z0 } @@ -289,9 +289,9 @@ ; CHECK-NEXT: mov z0.d, p0/m, za0v.d[w12, 1] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv2f64( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv2f64( %zd, %pg, i32 0, i32 %tileslice) %tileslice.1 = add i32 %tileslice, 1 - %z1 = call @llvm.aarch64.sme.read.vert.nxv2f64( %zd, %pg, i64 0, i32 %tileslice.1) + %z1 = call @llvm.aarch64.sme.read.vert.nxv2f64( %zd, %pg, i32 0, i32 %tileslice.1) ret %z0 } @@ -301,7 +301,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv16i8( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv16i8( %zd, %pg, i32 0, i32 0) ret %res } @@ -311,7 +311,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv8i16( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv8i16( %zd, %pg, i32 0, i32 0) ret %res } @@ -321,7 +321,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv8f16( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv8f16( %zd, %pg, i32 0, i32 0) ret %res } @@ -331,7 +331,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv4i32( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv4i32( %zd, %pg, i32 0, i32 0) ret %res } @@ -341,7 +341,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv4f32( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv4f32( %zd, %pg, i32 0, i32 0) ret %res } @@ -351,7 +351,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv2i64( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv2i64( %zd, %pg, i32 0, i32 0) ret %res } @@ -361,7 +361,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv2f64( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv2f64( %zd, %pg, i32 0, i32 0) ret %res } @@ -371,7 +371,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv16i8( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv16i8( %zd, %pg, i32 15, i32 0) ret %res } @@ -381,7 +381,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv8i16( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv8i16( %zd, %pg, i32 15, i32 0) ret %res } @@ -391,7 +391,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv8f16( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv8f16( %zd, %pg, i32 15, i32 0) ret %res } @@ -401,7 +401,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv4i32( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv4i32( %zd, %pg, i32 15, i32 0) ret %res } @@ -411,7 +411,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv4f32( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv4f32( %zd, %pg, i32 15, i32 0) ret %res } @@ -421,7 +421,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv2i64( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv2i64( %zd, %pg, i32 15, i32 0) ret %res } @@ -431,7 +431,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv2f64( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv2f64( %zd, %pg, i32 15, i32 0) ret %res } @@ -461,9 +461,9 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %z0 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i64 0, i32 %base) - %z1 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i64 0, i32 %add1) - %z2 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i64 0, i32 %add2) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i32 0, i32 %base) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i32 0, i32 %add1) + %z2 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i32 0, i32 %add2) %inc = add nuw nsw i32 %i, 3 %exitcond.not = icmp eq i32 %inc, %N br i1 %exitcond.not, label %exit, label %for.body @@ -474,36 +474,36 @@ ret %res } -declare @llvm.aarch64.sme.read.horiz.nxv16i8(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv8i16(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv8f16(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv8bf16(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv4i32(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv4f32(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv2i64(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv2f64(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv16i8(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv8i16(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv8f16(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv8bf16(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv4i32(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv4f32(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv2i64(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv2f64(, , i64, i32) +declare @llvm.aarch64.sme.read.horiz.nxv16i8(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv8i16(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv8f16(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv8bf16(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv4i32(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv4f32(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv2i64(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv2f64(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv16i8(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv8i16(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv8f16(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv8bf16(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv4i32(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv4f32(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv2i64(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv2f64(, , i32, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv16i8(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv8i16(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv8f16(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv8bf16(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv4i32(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv4f32(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv2i64(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv2f64(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv16i8(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv8i16(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv8f16(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv8bf16(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv4i32(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv4f32(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv2i64(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv2f64(, , i64, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv16i8(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv8i16(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv8f16(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv8bf16(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv4i32(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv4f32(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv2i64(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv2f64(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv16i8(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv8i16(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv8f16(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv8bf16(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv4i32(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv4f32(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv2i64(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv2f64(, , i32, i32) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll @@ -18,21 +18,21 @@ %z2, %z3, %z4, %z5, %z6, %z7) { - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice, %pg, %z0) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.2, %pg, %z1) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.2, %pg, %z1) %tileslice.4 = add i32 %tileslice, 4 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.4, %pg, %z2) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.4, %pg, %z2) %tileslice.6 = add i32 %tileslice, 6 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.6, %pg, %z3) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.6, %pg, %z3) %tileslice.8 = add i32 %tileslice, 8 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.8, %pg, %z4) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.8, %pg, %z4) %tileslice.10 = add i32 %tileslice, 10 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.10, %pg, %z5) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.10, %pg, %z5) %tileslice.12 = add i32 %tileslice, 12 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.12, %pg, %z6) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.12, %pg, %z6) %tileslice.14 = add i32 %tileslice, 14 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.14, %pg, %z7) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.14, %pg, %z7) ret void } @@ -54,21 +54,21 @@ %z4, %z5, %z6, %z7) { %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.1, %pg, %z0) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.1, %pg, %z0) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.3, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.3, %pg, %z1) %tileslice.5 = add i32 %tileslice, 5 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.5, %pg, %z2) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.5, %pg, %z2) %tileslice.7 = add i32 %tileslice, 7 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.7, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.7, %pg, %z3) %tileslice.9 = add i32 %tileslice, 9 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.9, %pg, %z4) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.9, %pg, %z4) %tileslice.11 = add i32 %tileslice, 11 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.11, %pg, %z5) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.11, %pg, %z5) %tileslice.13 = add i32 %tileslice, 13 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.13, %pg, %z6) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.13, %pg, %z6) %tileslice.15 = add i32 %tileslice, 15 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.15, %pg, %z7) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.15, %pg, %z7) ret void } @@ -85,13 +85,13 @@ %z2, %z3, %z4, %z5, %z6, %z7) { - call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice, %pg, %z0) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice.2, %pg, %z2) %tileslice.4 = add i32 %tileslice, 4 - call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice.4, %pg, %z4) + call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice.4, %pg, %z4) %tileslice.6 = add i32 %tileslice, 6 - call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice.6, %pg, %z6) + call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice.6, %pg, %z6) ret void } @@ -109,13 +109,13 @@ %z4, %z5, %z6, %z7) { %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.1, %pg, %z1) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.3, %pg, %z3) %tileslice.5 = add i32 %tileslice, 5 - call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.5, %pg, %z5) + call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.5, %pg, %z5) %tileslice.7 = add i32 %tileslice, 7 - call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.7, %pg, %z7) + call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.7, %pg, %z7) ret void } @@ -136,21 +136,21 @@ %z2, %z3, %z4, %z5, %z6, %z7) { - call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice, %pg, %z0) %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice.1, %pg, %z1) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.2, %pg, %z2) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.3, %pg, %z3) %tileslice.4 = add i32 %tileslice, 4 - call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice.4, %pg, %z4) + call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice.4, %pg, %z4) %tileslice.5 = add i32 %tileslice, 5 - call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice.5, %pg, %z5) + call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice.5, %pg, %z5) %tileslice.6 = add i32 %tileslice, 6 - call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.6, %pg, %z6) + call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.6, %pg, %z6) %tileslice.7 = add i32 %tileslice, 7 - call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.7, %pg, %z7) + call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.7, %pg, %z7) ret void } @@ -171,21 +171,21 @@ %z2, %z3, %z4, %z5, %z6, %z7) { - call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice, %pg, %z0) %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice.1, %pg, %z1) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.2, %pg, %z2) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.3, %pg, %z3) %tileslice.4 = add i32 %tileslice, 4 - call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice.4, %pg, %z4) + call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice.4, %pg, %z4) %tileslice.5 = add i32 %tileslice, 5 - call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice.5, %pg, %z5) + call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice.5, %pg, %z5) %tileslice.6 = add i32 %tileslice, 6 - call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.6, %pg, %z6) + call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.6, %pg, %z6) %tileslice.7 = add i32 %tileslice, 7 - call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.7, %pg, %z7) + call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.7, %pg, %z7) ret void } @@ -198,9 +198,9 @@ ; CHECK-NEXT: ret %z0, %z1, %z2, %z3) { - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %tileslice, %pg, %z0) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %tileslice.2, %pg, %z2) ret void } @@ -214,9 +214,9 @@ %z0, %z1, %z2, %z3) { %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 %tileslice.1, %pg, %z1) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 %tileslice.3, %pg, %z3) ret void } @@ -231,13 +231,13 @@ ; CHECK-NEXT: ret %z0, %z1, %z2, %z3) { - call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 %tileslice, %pg, %z0) %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 0, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 %tileslice.1, %pg, %z1) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 %tileslice.2, %pg, %z2) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 0, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 %tileslice.3, %pg, %z3) ret void } @@ -248,7 +248,7 @@ ; CHECK-NEXT: mov za0h.d[w12, 0], p0/m, z0.d ; CHECK-NEXT: ret %z0, %z1) { - call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 %tileslice, %pg, %z0) ret void } @@ -260,7 +260,7 @@ ; CHECK-NEXT: ret %z0, %z1) { %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 7, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 %tileslice.1, %pg, %z1) ret void } @@ -272,9 +272,9 @@ ; CHECK-NEXT: mov za0v.d[w12, 1], p0/m, z1.d ; CHECK-NEXT: ret %z0, %z1) { - call void @llvm.aarch64.sme.write.horiz.nxv2f64(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 %tileslice, %pg, %z0) %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv2f64(i64 0, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 %tileslice.1, %pg, %z1) ret void } @@ -284,7 +284,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 0, %pg, %zn) ret void } @@ -294,7 +294,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 0, %pg, %zn) ret void } @@ -304,7 +304,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 0, %pg, %zn) ret void } @@ -314,7 +314,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 0, %pg, %zn) ret void } @@ -324,7 +324,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 0, %pg, %zn) ret void } @@ -334,7 +334,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 0, %pg, %zn) ret void } @@ -344,7 +344,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 0, %pg, %zn) ret void } @@ -354,7 +354,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 0, %pg, %zn) ret void } @@ -364,7 +364,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 0, %pg, %zn) ret void } @@ -374,7 +374,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 0, %pg, %zn) ret void } @@ -384,7 +384,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 0, %pg, %zn) ret void } @@ -394,7 +394,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 0, %pg, %zn) ret void } @@ -404,7 +404,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 0, %pg, %zn) ret void } @@ -414,7 +414,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 0, %pg, %zn) ret void } @@ -424,7 +424,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 0, %pg, %zn) ret void } @@ -434,7 +434,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 0, %pg, %zn) ret void } @@ -459,9 +459,9 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %base, %pg, zeroinitializer) - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %add1, %pg, zeroinitializer) - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %add2, %pg, zeroinitializer) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %base, %pg, zeroinitializer) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %add1, %pg, zeroinitializer) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %add2, %pg, zeroinitializer) %inc = add nuw nsw i32 %i, 3 %exitcond.not = icmp eq i32 %inc, %N br i1 %exitcond.not, label %exit, label %for.body @@ -470,36 +470,36 @@ ret void } -declare void @llvm.aarch64.sme.write.horiz.nxv16i8(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv8i16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv8f16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv4i32(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv4f32(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv2i64(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv2f64(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv16i8(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv8i16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv8f16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv8bf16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv4i32(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv4f32(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv2i64(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv2f64(i64, i32, , ) - -declare void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv16i8(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv8i16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv8f16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv4i32(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv4f32(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv2i64(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv2f64(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv16i8(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv8i16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv8f16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv8bf16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv4i32(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv4f32(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv2i64(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv2f64(i32, i32, , ) + +declare void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32, i32, , ) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: st1b {za0v.b[w13, 0]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %ptr, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %ptr, i32 0, i32 0) ret void; } @@ -25,8 +25,8 @@ ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %base, i32 0, i32 %tileslice) ret void; } @@ -41,10 +41,10 @@ ; CHECK-NEXT: st1h {za1v.h[w12, 7]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i64 1, i32 %tileslice) + call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i32 1, i32 %tileslice) ret void; } @@ -58,8 +58,8 @@ ; CHECK-NEXT: ret %base = getelementptr i16, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %base, i64 1, i32 0) + call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %base, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %base, i32 1, i32 0) ret void; } @@ -78,14 +78,14 @@ ; CHECK-NEXT: st1w {za3v.s[w13, 0]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 3, i32 %tileslice) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 2, i32 %tileslice) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i32 3, i32 %tileslice) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i32 2, i32 %tileslice) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i32 3, i32 0) ret void; } @@ -99,8 +99,8 @@ ; CHECK-NEXT: ret %base = getelementptr i32, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %base, i64 3, i32 %tileslice) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %base, i32 3, i32 %tileslice) ret void; } @@ -127,22 +127,22 @@ ; CHECK-NEXT: st1d {za7v.d[w12, 1]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 4, i32 %tileslice) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 7, i32 %tileslice) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 4, i32 %tileslice) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 7, i32 %tileslice) ret void; } @@ -156,8 +156,8 @@ ; CHECK-NEXT: ret %base = getelementptr i64, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %base, i64 7, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %base, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %base, i32 7, i32 0) ret void; } @@ -198,38 +198,38 @@ ; CHECK-NEXT: st1q {za14v.q[w12, 0]}, p0, [x0] ; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 15, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 15, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 8, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 9, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 10, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 11, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 12, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 13, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 14, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 15, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 8, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 9, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 10, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 11, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 12, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 13, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 14, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 15, i32 0) ret void; } @@ -241,8 +241,8 @@ ; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %base = getelementptr i128, ptr %ptr, i64 %index - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %base, i64 15, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %base, i32 15, i32 0) ret void; } @@ -317,9 +317,9 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] - tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i64 0, i32 %base) - tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i64 0, i32 %add0) - tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i64 0, i32 %add1) + tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i32 0, i32 %base) + tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i32 0, i32 %add0) + tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i32 0, i32 %add1) %inc = add nuw nsw i32 %i, 1 %exitcond.not = icmp eq i32 %inc, %N br i1 %exitcond.not, label %exit, label %for.body @@ -328,16 +328,16 @@ ret void } -declare void @llvm.aarch64.sme.st1b.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1h.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1w.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1d.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1q.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1b.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1h.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1w.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1d.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1q.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1b.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1h.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1w.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1d.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1q.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1b.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1h.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1w.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1d.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1q.vert(, ptr, i32, i32) declare void @llvm.aarch64.sme.str(i32, ptr) declare i64 @llvm.vscale.i64() diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll @@ -262,263 +262,263 @@ ; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za5.d, za6.d, za7.d} ; CHECK-NEXT: zero {za} ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.zero(i64 0) - call void @llvm.aarch64.sme.zero(i64 1) - call void @llvm.aarch64.sme.zero(i64 2) - call void @llvm.aarch64.sme.zero(i64 3) - call void @llvm.aarch64.sme.zero(i64 4) - call void @llvm.aarch64.sme.zero(i64 5) - call void @llvm.aarch64.sme.zero(i64 6) - call void @llvm.aarch64.sme.zero(i64 7) - call void @llvm.aarch64.sme.zero(i64 8) - call void @llvm.aarch64.sme.zero(i64 9) - call void @llvm.aarch64.sme.zero(i64 10) - call void @llvm.aarch64.sme.zero(i64 11) - call void @llvm.aarch64.sme.zero(i64 12) - call void @llvm.aarch64.sme.zero(i64 13) - call void @llvm.aarch64.sme.zero(i64 14) - call void @llvm.aarch64.sme.zero(i64 15) - call void @llvm.aarch64.sme.zero(i64 16) - call void @llvm.aarch64.sme.zero(i64 17) - call void @llvm.aarch64.sme.zero(i64 18) - call void @llvm.aarch64.sme.zero(i64 19) - call void @llvm.aarch64.sme.zero(i64 20) - call void @llvm.aarch64.sme.zero(i64 21) - call void @llvm.aarch64.sme.zero(i64 22) - call void @llvm.aarch64.sme.zero(i64 23) - call void @llvm.aarch64.sme.zero(i64 24) - call void @llvm.aarch64.sme.zero(i64 25) - call void @llvm.aarch64.sme.zero(i64 26) - call void @llvm.aarch64.sme.zero(i64 27) - call void @llvm.aarch64.sme.zero(i64 28) - call void @llvm.aarch64.sme.zero(i64 29) - call void @llvm.aarch64.sme.zero(i64 30) - call void @llvm.aarch64.sme.zero(i64 31) - call void @llvm.aarch64.sme.zero(i64 32) - call void @llvm.aarch64.sme.zero(i64 33) - call void @llvm.aarch64.sme.zero(i64 34) - call void @llvm.aarch64.sme.zero(i64 35) - call void @llvm.aarch64.sme.zero(i64 36) - call void @llvm.aarch64.sme.zero(i64 37) - call void @llvm.aarch64.sme.zero(i64 38) - call void @llvm.aarch64.sme.zero(i64 39) - call void @llvm.aarch64.sme.zero(i64 40) - call void @llvm.aarch64.sme.zero(i64 41) - call void @llvm.aarch64.sme.zero(i64 42) - call void @llvm.aarch64.sme.zero(i64 43) - call void @llvm.aarch64.sme.zero(i64 44) - call void @llvm.aarch64.sme.zero(i64 45) - call void @llvm.aarch64.sme.zero(i64 46) - call void @llvm.aarch64.sme.zero(i64 47) - call void @llvm.aarch64.sme.zero(i64 48) - call void @llvm.aarch64.sme.zero(i64 49) - call void @llvm.aarch64.sme.zero(i64 50) - call void @llvm.aarch64.sme.zero(i64 51) - call void @llvm.aarch64.sme.zero(i64 52) - call void @llvm.aarch64.sme.zero(i64 53) - call void @llvm.aarch64.sme.zero(i64 54) - call void @llvm.aarch64.sme.zero(i64 55) - call void @llvm.aarch64.sme.zero(i64 56) - call void @llvm.aarch64.sme.zero(i64 57) - call void @llvm.aarch64.sme.zero(i64 58) - call void @llvm.aarch64.sme.zero(i64 59) - call void @llvm.aarch64.sme.zero(i64 60) - call void @llvm.aarch64.sme.zero(i64 61) - call void @llvm.aarch64.sme.zero(i64 62) - call void @llvm.aarch64.sme.zero(i64 63) - call void @llvm.aarch64.sme.zero(i64 64) - call void @llvm.aarch64.sme.zero(i64 65) - call void @llvm.aarch64.sme.zero(i64 66) - call void @llvm.aarch64.sme.zero(i64 67) - call void @llvm.aarch64.sme.zero(i64 68) - call void @llvm.aarch64.sme.zero(i64 69) - call void @llvm.aarch64.sme.zero(i64 70) - call void @llvm.aarch64.sme.zero(i64 71) - call void @llvm.aarch64.sme.zero(i64 72) - call void @llvm.aarch64.sme.zero(i64 73) - call void @llvm.aarch64.sme.zero(i64 74) - call void @llvm.aarch64.sme.zero(i64 75) - call void @llvm.aarch64.sme.zero(i64 76) - call void @llvm.aarch64.sme.zero(i64 77) - call void @llvm.aarch64.sme.zero(i64 78) - call void @llvm.aarch64.sme.zero(i64 79) - call void @llvm.aarch64.sme.zero(i64 80) - call void @llvm.aarch64.sme.zero(i64 81) - call void @llvm.aarch64.sme.zero(i64 82) - call void @llvm.aarch64.sme.zero(i64 83) - call void @llvm.aarch64.sme.zero(i64 84) - call void @llvm.aarch64.sme.zero(i64 85) - call void @llvm.aarch64.sme.zero(i64 86) - call void @llvm.aarch64.sme.zero(i64 87) - call void @llvm.aarch64.sme.zero(i64 88) - call void @llvm.aarch64.sme.zero(i64 89) - call void @llvm.aarch64.sme.zero(i64 90) - call void @llvm.aarch64.sme.zero(i64 91) - call void @llvm.aarch64.sme.zero(i64 92) - call void @llvm.aarch64.sme.zero(i64 93) - call void @llvm.aarch64.sme.zero(i64 94) - call void @llvm.aarch64.sme.zero(i64 95) - call void @llvm.aarch64.sme.zero(i64 96) - call void @llvm.aarch64.sme.zero(i64 97) - call void @llvm.aarch64.sme.zero(i64 98) - call void @llvm.aarch64.sme.zero(i64 99) - call void @llvm.aarch64.sme.zero(i64 100) - call void @llvm.aarch64.sme.zero(i64 101) - call void @llvm.aarch64.sme.zero(i64 102) - call void @llvm.aarch64.sme.zero(i64 103) - call void @llvm.aarch64.sme.zero(i64 104) - call void @llvm.aarch64.sme.zero(i64 105) - call void @llvm.aarch64.sme.zero(i64 106) - call void @llvm.aarch64.sme.zero(i64 107) - call void @llvm.aarch64.sme.zero(i64 108) - call void @llvm.aarch64.sme.zero(i64 109) - call void @llvm.aarch64.sme.zero(i64 110) - call void @llvm.aarch64.sme.zero(i64 111) - call void @llvm.aarch64.sme.zero(i64 112) - call void @llvm.aarch64.sme.zero(i64 113) - call void @llvm.aarch64.sme.zero(i64 114) - call void @llvm.aarch64.sme.zero(i64 115) - call void @llvm.aarch64.sme.zero(i64 116) - call void @llvm.aarch64.sme.zero(i64 117) - call void @llvm.aarch64.sme.zero(i64 118) - call void @llvm.aarch64.sme.zero(i64 119) - call void @llvm.aarch64.sme.zero(i64 120) - call void @llvm.aarch64.sme.zero(i64 121) - call void @llvm.aarch64.sme.zero(i64 122) - call void @llvm.aarch64.sme.zero(i64 123) - call void @llvm.aarch64.sme.zero(i64 124) - call void @llvm.aarch64.sme.zero(i64 125) - call void @llvm.aarch64.sme.zero(i64 126) - call void @llvm.aarch64.sme.zero(i64 127) - call void @llvm.aarch64.sme.zero(i64 128) - call void @llvm.aarch64.sme.zero(i64 129) - call void @llvm.aarch64.sme.zero(i64 130) - call void @llvm.aarch64.sme.zero(i64 131) - call void @llvm.aarch64.sme.zero(i64 132) - call void @llvm.aarch64.sme.zero(i64 133) - call void @llvm.aarch64.sme.zero(i64 134) - call void @llvm.aarch64.sme.zero(i64 135) - call void @llvm.aarch64.sme.zero(i64 136) - call void @llvm.aarch64.sme.zero(i64 137) - call void @llvm.aarch64.sme.zero(i64 138) - call void @llvm.aarch64.sme.zero(i64 139) - call void @llvm.aarch64.sme.zero(i64 140) - call void @llvm.aarch64.sme.zero(i64 141) - call void @llvm.aarch64.sme.zero(i64 142) - call void @llvm.aarch64.sme.zero(i64 143) - call void @llvm.aarch64.sme.zero(i64 144) - call void @llvm.aarch64.sme.zero(i64 145) - call void @llvm.aarch64.sme.zero(i64 146) - call void @llvm.aarch64.sme.zero(i64 147) - call void @llvm.aarch64.sme.zero(i64 148) - call void @llvm.aarch64.sme.zero(i64 149) - call void @llvm.aarch64.sme.zero(i64 150) - call void @llvm.aarch64.sme.zero(i64 151) - call void @llvm.aarch64.sme.zero(i64 152) - call void @llvm.aarch64.sme.zero(i64 153) - call void @llvm.aarch64.sme.zero(i64 154) - call void @llvm.aarch64.sme.zero(i64 155) - call void @llvm.aarch64.sme.zero(i64 156) - call void @llvm.aarch64.sme.zero(i64 157) - call void @llvm.aarch64.sme.zero(i64 158) - call void @llvm.aarch64.sme.zero(i64 159) - call void @llvm.aarch64.sme.zero(i64 160) - call void @llvm.aarch64.sme.zero(i64 161) - call void @llvm.aarch64.sme.zero(i64 162) - call void @llvm.aarch64.sme.zero(i64 163) - call void @llvm.aarch64.sme.zero(i64 164) - call void @llvm.aarch64.sme.zero(i64 165) - call void @llvm.aarch64.sme.zero(i64 166) - call void @llvm.aarch64.sme.zero(i64 167) - call void @llvm.aarch64.sme.zero(i64 168) - call void @llvm.aarch64.sme.zero(i64 169) - call void @llvm.aarch64.sme.zero(i64 170) - call void @llvm.aarch64.sme.zero(i64 171) - call void @llvm.aarch64.sme.zero(i64 172) - call void @llvm.aarch64.sme.zero(i64 173) - call void @llvm.aarch64.sme.zero(i64 174) - call void @llvm.aarch64.sme.zero(i64 175) - call void @llvm.aarch64.sme.zero(i64 176) - call void @llvm.aarch64.sme.zero(i64 177) - call void @llvm.aarch64.sme.zero(i64 178) - call void @llvm.aarch64.sme.zero(i64 179) - call void @llvm.aarch64.sme.zero(i64 180) - call void @llvm.aarch64.sme.zero(i64 181) - call void @llvm.aarch64.sme.zero(i64 182) - call void @llvm.aarch64.sme.zero(i64 183) - call void @llvm.aarch64.sme.zero(i64 184) - call void @llvm.aarch64.sme.zero(i64 185) - call void @llvm.aarch64.sme.zero(i64 186) - call void @llvm.aarch64.sme.zero(i64 187) - call void @llvm.aarch64.sme.zero(i64 188) - call void @llvm.aarch64.sme.zero(i64 189) - call void @llvm.aarch64.sme.zero(i64 190) - call void @llvm.aarch64.sme.zero(i64 191) - call void @llvm.aarch64.sme.zero(i64 192) - call void @llvm.aarch64.sme.zero(i64 193) - call void @llvm.aarch64.sme.zero(i64 194) - call void @llvm.aarch64.sme.zero(i64 195) - call void @llvm.aarch64.sme.zero(i64 196) - call void @llvm.aarch64.sme.zero(i64 197) - call void @llvm.aarch64.sme.zero(i64 198) - call void @llvm.aarch64.sme.zero(i64 199) - call void @llvm.aarch64.sme.zero(i64 200) - call void @llvm.aarch64.sme.zero(i64 201) - call void @llvm.aarch64.sme.zero(i64 202) - call void @llvm.aarch64.sme.zero(i64 203) - call void @llvm.aarch64.sme.zero(i64 204) - call void @llvm.aarch64.sme.zero(i64 205) - call void @llvm.aarch64.sme.zero(i64 206) - call void @llvm.aarch64.sme.zero(i64 207) - call void @llvm.aarch64.sme.zero(i64 208) - call void @llvm.aarch64.sme.zero(i64 209) - call void @llvm.aarch64.sme.zero(i64 210) - call void @llvm.aarch64.sme.zero(i64 211) - call void @llvm.aarch64.sme.zero(i64 212) - call void @llvm.aarch64.sme.zero(i64 213) - call void @llvm.aarch64.sme.zero(i64 214) - call void @llvm.aarch64.sme.zero(i64 215) - call void @llvm.aarch64.sme.zero(i64 216) - call void @llvm.aarch64.sme.zero(i64 217) - call void @llvm.aarch64.sme.zero(i64 218) - call void @llvm.aarch64.sme.zero(i64 219) - call void @llvm.aarch64.sme.zero(i64 220) - call void @llvm.aarch64.sme.zero(i64 221) - call void @llvm.aarch64.sme.zero(i64 222) - call void @llvm.aarch64.sme.zero(i64 223) - call void @llvm.aarch64.sme.zero(i64 224) - call void @llvm.aarch64.sme.zero(i64 225) - call void @llvm.aarch64.sme.zero(i64 226) - call void @llvm.aarch64.sme.zero(i64 227) - call void @llvm.aarch64.sme.zero(i64 228) - call void @llvm.aarch64.sme.zero(i64 229) - call void @llvm.aarch64.sme.zero(i64 230) - call void @llvm.aarch64.sme.zero(i64 231) - call void @llvm.aarch64.sme.zero(i64 232) - call void @llvm.aarch64.sme.zero(i64 233) - call void @llvm.aarch64.sme.zero(i64 234) - call void @llvm.aarch64.sme.zero(i64 235) - call void @llvm.aarch64.sme.zero(i64 236) - call void @llvm.aarch64.sme.zero(i64 237) - call void @llvm.aarch64.sme.zero(i64 238) - call void @llvm.aarch64.sme.zero(i64 239) - call void @llvm.aarch64.sme.zero(i64 240) - call void @llvm.aarch64.sme.zero(i64 241) - call void @llvm.aarch64.sme.zero(i64 242) - call void @llvm.aarch64.sme.zero(i64 243) - call void @llvm.aarch64.sme.zero(i64 244) - call void @llvm.aarch64.sme.zero(i64 245) - call void @llvm.aarch64.sme.zero(i64 246) - call void @llvm.aarch64.sme.zero(i64 247) - call void @llvm.aarch64.sme.zero(i64 248) - call void @llvm.aarch64.sme.zero(i64 249) - call void @llvm.aarch64.sme.zero(i64 250) - call void @llvm.aarch64.sme.zero(i64 251) - call void @llvm.aarch64.sme.zero(i64 252) - call void @llvm.aarch64.sme.zero(i64 253) - call void @llvm.aarch64.sme.zero(i64 254) - call void @llvm.aarch64.sme.zero(i64 255) + call void @llvm.aarch64.sme.zero(i32 0) + call void @llvm.aarch64.sme.zero(i32 1) + call void @llvm.aarch64.sme.zero(i32 2) + call void @llvm.aarch64.sme.zero(i32 3) + call void @llvm.aarch64.sme.zero(i32 4) + call void @llvm.aarch64.sme.zero(i32 5) + call void @llvm.aarch64.sme.zero(i32 6) + call void @llvm.aarch64.sme.zero(i32 7) + call void @llvm.aarch64.sme.zero(i32 8) + call void @llvm.aarch64.sme.zero(i32 9) + call void @llvm.aarch64.sme.zero(i32 10) + call void @llvm.aarch64.sme.zero(i32 11) + call void @llvm.aarch64.sme.zero(i32 12) + call void @llvm.aarch64.sme.zero(i32 13) + call void @llvm.aarch64.sme.zero(i32 14) + call void @llvm.aarch64.sme.zero(i32 15) + call void @llvm.aarch64.sme.zero(i32 16) + call void @llvm.aarch64.sme.zero(i32 17) + call void @llvm.aarch64.sme.zero(i32 18) + call void @llvm.aarch64.sme.zero(i32 19) + call void @llvm.aarch64.sme.zero(i32 20) + call void @llvm.aarch64.sme.zero(i32 21) + call void @llvm.aarch64.sme.zero(i32 22) + call void @llvm.aarch64.sme.zero(i32 23) + call void @llvm.aarch64.sme.zero(i32 24) + call void @llvm.aarch64.sme.zero(i32 25) + call void @llvm.aarch64.sme.zero(i32 26) + call void @llvm.aarch64.sme.zero(i32 27) + call void @llvm.aarch64.sme.zero(i32 28) + call void @llvm.aarch64.sme.zero(i32 29) + call void @llvm.aarch64.sme.zero(i32 30) + call void @llvm.aarch64.sme.zero(i32 31) + call void @llvm.aarch64.sme.zero(i32 32) + call void @llvm.aarch64.sme.zero(i32 33) + call void @llvm.aarch64.sme.zero(i32 34) + call void @llvm.aarch64.sme.zero(i32 35) + call void @llvm.aarch64.sme.zero(i32 36) + call void @llvm.aarch64.sme.zero(i32 37) + call void @llvm.aarch64.sme.zero(i32 38) + call void @llvm.aarch64.sme.zero(i32 39) + call void @llvm.aarch64.sme.zero(i32 40) + call void @llvm.aarch64.sme.zero(i32 41) + call void @llvm.aarch64.sme.zero(i32 42) + call void @llvm.aarch64.sme.zero(i32 43) + call void @llvm.aarch64.sme.zero(i32 44) + call void @llvm.aarch64.sme.zero(i32 45) + call void @llvm.aarch64.sme.zero(i32 46) + call void @llvm.aarch64.sme.zero(i32 47) + call void @llvm.aarch64.sme.zero(i32 48) + call void @llvm.aarch64.sme.zero(i32 49) + call void @llvm.aarch64.sme.zero(i32 50) + call void @llvm.aarch64.sme.zero(i32 51) + call void @llvm.aarch64.sme.zero(i32 52) + call void @llvm.aarch64.sme.zero(i32 53) + call void @llvm.aarch64.sme.zero(i32 54) + call void @llvm.aarch64.sme.zero(i32 55) + call void @llvm.aarch64.sme.zero(i32 56) + call void @llvm.aarch64.sme.zero(i32 57) + call void @llvm.aarch64.sme.zero(i32 58) + call void @llvm.aarch64.sme.zero(i32 59) + call void @llvm.aarch64.sme.zero(i32 60) + call void @llvm.aarch64.sme.zero(i32 61) + call void @llvm.aarch64.sme.zero(i32 62) + call void @llvm.aarch64.sme.zero(i32 63) + call void @llvm.aarch64.sme.zero(i32 64) + call void @llvm.aarch64.sme.zero(i32 65) + call void @llvm.aarch64.sme.zero(i32 66) + call void @llvm.aarch64.sme.zero(i32 67) + call void @llvm.aarch64.sme.zero(i32 68) + call void @llvm.aarch64.sme.zero(i32 69) + call void @llvm.aarch64.sme.zero(i32 70) + call void @llvm.aarch64.sme.zero(i32 71) + call void @llvm.aarch64.sme.zero(i32 72) + call void @llvm.aarch64.sme.zero(i32 73) + call void @llvm.aarch64.sme.zero(i32 74) + call void @llvm.aarch64.sme.zero(i32 75) + call void @llvm.aarch64.sme.zero(i32 76) + call void @llvm.aarch64.sme.zero(i32 77) + call void @llvm.aarch64.sme.zero(i32 78) + call void @llvm.aarch64.sme.zero(i32 79) + call void @llvm.aarch64.sme.zero(i32 80) + call void @llvm.aarch64.sme.zero(i32 81) + call void @llvm.aarch64.sme.zero(i32 82) + call void @llvm.aarch64.sme.zero(i32 83) + call void @llvm.aarch64.sme.zero(i32 84) + call void @llvm.aarch64.sme.zero(i32 85) + call void @llvm.aarch64.sme.zero(i32 86) + call void @llvm.aarch64.sme.zero(i32 87) + call void @llvm.aarch64.sme.zero(i32 88) + call void @llvm.aarch64.sme.zero(i32 89) + call void @llvm.aarch64.sme.zero(i32 90) + call void @llvm.aarch64.sme.zero(i32 91) + call void @llvm.aarch64.sme.zero(i32 92) + call void @llvm.aarch64.sme.zero(i32 93) + call void @llvm.aarch64.sme.zero(i32 94) + call void @llvm.aarch64.sme.zero(i32 95) + call void @llvm.aarch64.sme.zero(i32 96) + call void @llvm.aarch64.sme.zero(i32 97) + call void @llvm.aarch64.sme.zero(i32 98) + call void @llvm.aarch64.sme.zero(i32 99) + call void @llvm.aarch64.sme.zero(i32 100) + call void @llvm.aarch64.sme.zero(i32 101) + call void @llvm.aarch64.sme.zero(i32 102) + call void @llvm.aarch64.sme.zero(i32 103) + call void @llvm.aarch64.sme.zero(i32 104) + call void @llvm.aarch64.sme.zero(i32 105) + call void @llvm.aarch64.sme.zero(i32 106) + call void @llvm.aarch64.sme.zero(i32 107) + call void @llvm.aarch64.sme.zero(i32 108) + call void @llvm.aarch64.sme.zero(i32 109) + call void @llvm.aarch64.sme.zero(i32 110) + call void @llvm.aarch64.sme.zero(i32 111) + call void @llvm.aarch64.sme.zero(i32 112) + call void @llvm.aarch64.sme.zero(i32 113) + call void @llvm.aarch64.sme.zero(i32 114) + call void @llvm.aarch64.sme.zero(i32 115) + call void @llvm.aarch64.sme.zero(i32 116) + call void @llvm.aarch64.sme.zero(i32 117) + call void @llvm.aarch64.sme.zero(i32 118) + call void @llvm.aarch64.sme.zero(i32 119) + call void @llvm.aarch64.sme.zero(i32 120) + call void @llvm.aarch64.sme.zero(i32 121) + call void @llvm.aarch64.sme.zero(i32 122) + call void @llvm.aarch64.sme.zero(i32 123) + call void @llvm.aarch64.sme.zero(i32 124) + call void @llvm.aarch64.sme.zero(i32 125) + call void @llvm.aarch64.sme.zero(i32 126) + call void @llvm.aarch64.sme.zero(i32 127) + call void @llvm.aarch64.sme.zero(i32 128) + call void @llvm.aarch64.sme.zero(i32 129) + call void @llvm.aarch64.sme.zero(i32 130) + call void @llvm.aarch64.sme.zero(i32 131) + call void @llvm.aarch64.sme.zero(i32 132) + call void @llvm.aarch64.sme.zero(i32 133) + call void @llvm.aarch64.sme.zero(i32 134) + call void @llvm.aarch64.sme.zero(i32 135) + call void @llvm.aarch64.sme.zero(i32 136) + call void @llvm.aarch64.sme.zero(i32 137) + call void @llvm.aarch64.sme.zero(i32 138) + call void @llvm.aarch64.sme.zero(i32 139) + call void @llvm.aarch64.sme.zero(i32 140) + call void @llvm.aarch64.sme.zero(i32 141) + call void @llvm.aarch64.sme.zero(i32 142) + call void @llvm.aarch64.sme.zero(i32 143) + call void @llvm.aarch64.sme.zero(i32 144) + call void @llvm.aarch64.sme.zero(i32 145) + call void @llvm.aarch64.sme.zero(i32 146) + call void @llvm.aarch64.sme.zero(i32 147) + call void @llvm.aarch64.sme.zero(i32 148) + call void @llvm.aarch64.sme.zero(i32 149) + call void @llvm.aarch64.sme.zero(i32 150) + call void @llvm.aarch64.sme.zero(i32 151) + call void @llvm.aarch64.sme.zero(i32 152) + call void @llvm.aarch64.sme.zero(i32 153) + call void @llvm.aarch64.sme.zero(i32 154) + call void @llvm.aarch64.sme.zero(i32 155) + call void @llvm.aarch64.sme.zero(i32 156) + call void @llvm.aarch64.sme.zero(i32 157) + call void @llvm.aarch64.sme.zero(i32 158) + call void @llvm.aarch64.sme.zero(i32 159) + call void @llvm.aarch64.sme.zero(i32 160) + call void @llvm.aarch64.sme.zero(i32 161) + call void @llvm.aarch64.sme.zero(i32 162) + call void @llvm.aarch64.sme.zero(i32 163) + call void @llvm.aarch64.sme.zero(i32 164) + call void @llvm.aarch64.sme.zero(i32 165) + call void @llvm.aarch64.sme.zero(i32 166) + call void @llvm.aarch64.sme.zero(i32 167) + call void @llvm.aarch64.sme.zero(i32 168) + call void @llvm.aarch64.sme.zero(i32 169) + call void @llvm.aarch64.sme.zero(i32 170) + call void @llvm.aarch64.sme.zero(i32 171) + call void @llvm.aarch64.sme.zero(i32 172) + call void @llvm.aarch64.sme.zero(i32 173) + call void @llvm.aarch64.sme.zero(i32 174) + call void @llvm.aarch64.sme.zero(i32 175) + call void @llvm.aarch64.sme.zero(i32 176) + call void @llvm.aarch64.sme.zero(i32 177) + call void @llvm.aarch64.sme.zero(i32 178) + call void @llvm.aarch64.sme.zero(i32 179) + call void @llvm.aarch64.sme.zero(i32 180) + call void @llvm.aarch64.sme.zero(i32 181) + call void @llvm.aarch64.sme.zero(i32 182) + call void @llvm.aarch64.sme.zero(i32 183) + call void @llvm.aarch64.sme.zero(i32 184) + call void @llvm.aarch64.sme.zero(i32 185) + call void @llvm.aarch64.sme.zero(i32 186) + call void @llvm.aarch64.sme.zero(i32 187) + call void @llvm.aarch64.sme.zero(i32 188) + call void @llvm.aarch64.sme.zero(i32 189) + call void @llvm.aarch64.sme.zero(i32 190) + call void @llvm.aarch64.sme.zero(i32 191) + call void @llvm.aarch64.sme.zero(i32 192) + call void @llvm.aarch64.sme.zero(i32 193) + call void @llvm.aarch64.sme.zero(i32 194) + call void @llvm.aarch64.sme.zero(i32 195) + call void @llvm.aarch64.sme.zero(i32 196) + call void @llvm.aarch64.sme.zero(i32 197) + call void @llvm.aarch64.sme.zero(i32 198) + call void @llvm.aarch64.sme.zero(i32 199) + call void @llvm.aarch64.sme.zero(i32 200) + call void @llvm.aarch64.sme.zero(i32 201) + call void @llvm.aarch64.sme.zero(i32 202) + call void @llvm.aarch64.sme.zero(i32 203) + call void @llvm.aarch64.sme.zero(i32 204) + call void @llvm.aarch64.sme.zero(i32 205) + call void @llvm.aarch64.sme.zero(i32 206) + call void @llvm.aarch64.sme.zero(i32 207) + call void @llvm.aarch64.sme.zero(i32 208) + call void @llvm.aarch64.sme.zero(i32 209) + call void @llvm.aarch64.sme.zero(i32 210) + call void @llvm.aarch64.sme.zero(i32 211) + call void @llvm.aarch64.sme.zero(i32 212) + call void @llvm.aarch64.sme.zero(i32 213) + call void @llvm.aarch64.sme.zero(i32 214) + call void @llvm.aarch64.sme.zero(i32 215) + call void @llvm.aarch64.sme.zero(i32 216) + call void @llvm.aarch64.sme.zero(i32 217) + call void @llvm.aarch64.sme.zero(i32 218) + call void @llvm.aarch64.sme.zero(i32 219) + call void @llvm.aarch64.sme.zero(i32 220) + call void @llvm.aarch64.sme.zero(i32 221) + call void @llvm.aarch64.sme.zero(i32 222) + call void @llvm.aarch64.sme.zero(i32 223) + call void @llvm.aarch64.sme.zero(i32 224) + call void @llvm.aarch64.sme.zero(i32 225) + call void @llvm.aarch64.sme.zero(i32 226) + call void @llvm.aarch64.sme.zero(i32 227) + call void @llvm.aarch64.sme.zero(i32 228) + call void @llvm.aarch64.sme.zero(i32 229) + call void @llvm.aarch64.sme.zero(i32 230) + call void @llvm.aarch64.sme.zero(i32 231) + call void @llvm.aarch64.sme.zero(i32 232) + call void @llvm.aarch64.sme.zero(i32 233) + call void @llvm.aarch64.sme.zero(i32 234) + call void @llvm.aarch64.sme.zero(i32 235) + call void @llvm.aarch64.sme.zero(i32 236) + call void @llvm.aarch64.sme.zero(i32 237) + call void @llvm.aarch64.sme.zero(i32 238) + call void @llvm.aarch64.sme.zero(i32 239) + call void @llvm.aarch64.sme.zero(i32 240) + call void @llvm.aarch64.sme.zero(i32 241) + call void @llvm.aarch64.sme.zero(i32 242) + call void @llvm.aarch64.sme.zero(i32 243) + call void @llvm.aarch64.sme.zero(i32 244) + call void @llvm.aarch64.sme.zero(i32 245) + call void @llvm.aarch64.sme.zero(i32 246) + call void @llvm.aarch64.sme.zero(i32 247) + call void @llvm.aarch64.sme.zero(i32 248) + call void @llvm.aarch64.sme.zero(i32 249) + call void @llvm.aarch64.sme.zero(i32 250) + call void @llvm.aarch64.sme.zero(i32 251) + call void @llvm.aarch64.sme.zero(i32 252) + call void @llvm.aarch64.sme.zero(i32 253) + call void @llvm.aarch64.sme.zero(i32 254) + call void @llvm.aarch64.sme.zero(i32 255) ret void } -declare void @llvm.aarch64.sme.zero(i64) +declare void @llvm.aarch64.sme.zero(i32)