Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -103,6 +103,24 @@ def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", "Enable Scalable Vector Extension (SVE) instructions", [FeatureFullFP16]>; +// This flag is currently still labeled as Experimental, but when fully +// implemented this should tell the compiler to use the zeroing pseudos to +// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive +// lanes are known to be zero. The pseudos will then be expanded using the +// MOVPRFX instruction to zero the inactive lanes. This feature should only be +// enabled if MOVPRFX instructions are known to merge with the destructive +// operations they prefix. +// +// This feature could similarly be extended to support cheap merging of _any_ +// value into the inactive lanes using the MOVPRFX instruction that uses +// merging-predication. +def FeatureExperimentalZeroingPseudos + : SubtargetFeature<"use-experimental-zeroing-pseudos", + "UseExperimentalZeroingPseudos", "true", + "Hint to the compiler that the MOVPRFX instruction is " + "merged with destructive operations", + []>; + def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true", "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -155,6 +155,8 @@ def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; +def UseExperimentalZeroingPseudos + : Predicate<"Subtarget->useExperimentalZeroingPseudos()">; def UseAlternateSExtLoadCVTF32 : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -226,9 +226,11 @@ defm ADD_ZPZZ : sve_int_bin_pred_bhsd; - defm ADD_ZPZZ : sve_int_bin_pred_zx; - defm SUB_ZPZZ : sve_int_bin_pred_zx; - defm SUBR_ZPZZ : sve_int_bin_pred_zx; + let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { + defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + } defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>; defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>; @@ -354,18 +356,20 @@ defm FADD_ZPZZ : sve_fp_bin_pred_hfd; - defm FADD_ZPZZ : sve_fp_2op_p_zds_zx; - defm FSUB_ZPZZ : sve_fp_2op_p_zds_zx; - defm FMUL_ZPZZ : sve_fp_2op_p_zds_zx; - defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zx; - defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zx; - defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zx; - defm FMAX_ZPZZ : sve_fp_2op_p_zds_zx; - defm FMIN_ZPZZ : sve_fp_2op_p_zds_zx; - defm FABD_ZPZZ : sve_fp_2op_p_zds_zx; - defm FMULX_ZPZZ : sve_fp_2op_p_zds_zx; - defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zx; - defm FDIV_ZPZZ : sve_fp_2op_p_zds_zx; + let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { + defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd; + } defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>; defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>; @@ -1260,10 +1264,12 @@ defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">; defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; - defm ASR_ZPZZ : sve_int_bin_pred_zx; - defm LSR_ZPZZ : sve_int_bin_pred_zx; - defm LSL_ZPZZ : sve_int_bin_pred_zx; - defm ASRD_ZPZI : sve_int_bin_pred_shift_0_right_zx; + let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { + defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; + } defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ", 1>; defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ", 1>; @@ -2274,6 +2280,14 @@ defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; + let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in { + defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; + defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; + defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; + defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; + defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; + } + // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>; defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt", int_aarch64_sve_saddlt>; Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -104,6 +104,10 @@ bool HasPAN_RWV = false; bool HasCCPP = false; + // SVE extensions + bool HasSVE = false; + bool UseExperimentalZeroingPseudos = false; + // Armv8.2 Crypto extensions bool HasSM4 = false; bool HasSHA3 = false; @@ -130,8 +134,6 @@ bool HasRCPC_IMMO = false; bool HasLSLFast = false; - bool HasSVE = false; - bool HasSVE2 = false; bool HasRCPC = false; bool HasAggressiveFMA = false; @@ -158,6 +160,7 @@ bool HasEnhancedCounterVirtualization = false; // Arm SVE2 extensions + bool HasSVE2 = false; bool HasSVE2AES = false; bool HasSVE2SM4 = false; bool HasSVE2SHA3 = false; @@ -398,6 +401,10 @@ unsigned getWideningBaseCost() const { return WideningBaseCost; } + bool useExperimentalZeroingPseudos() const { + return UseExperimentalZeroingPseudos; + } + /// CPU has TBI (top byte of addresses is ignored during HW address /// translation) and OS enables it. bool supportsAddressTopByteIgnored() const; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1596,7 +1596,7 @@ def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_fp_2op_p_zds_zx { +multiclass sve_fp_2op_p_zds_zeroing_hsd { def _ZERO_H : PredTwoOpPseudo; def _ZERO_S : PredTwoOpPseudo; def _ZERO_D : PredTwoOpPseudo; @@ -4764,27 +4764,24 @@ let Inst{9-8} = imm{4-3}; } - def _B_Z_UNDEF : PredTwoOpImmPseudo; - def _H_Z_UNDEF : PredTwoOpImmPseudo; - def _S_Z_UNDEF : PredTwoOpImmPseudo; - def _D_Z_UNDEF : PredTwoOpImmPseudo; - - def _B_Z_ZERO : PredTwoOpImmPseudo; - def _H_Z_ZERO : PredTwoOpImmPseudo; - def _S_Z_ZERO : PredTwoOpImmPseudo; - def _D_Z_ZERO : PredTwoOpImmPseudo; - - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _B_Z_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _H_Z_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _S_Z_ZERO)>; - def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _D_Z_ZERO)>; - def : SVE_3_Op_Imm_Pat(NAME # _B)>; def : SVE_3_Op_Imm_Pat(NAME # _H)>; def : SVE_3_Op_Imm_Pat(NAME # _S)>; def : SVE_3_Op_Imm_Pat(NAME # _D)>; } +multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd { + def _ZERO_B : PredTwoOpImmPseudo; + def _ZERO_H : PredTwoOpImmPseudo; + def _ZERO_S : PredTwoOpImmPseudo; + def _ZERO_D : PredTwoOpImmPseudo; + + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_B)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_H)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_S)>; + def : SVE_3_Op_Pat_Shift_Imm_SelZero(NAME # _ZERO_D)>; +} + multiclass sve_int_bin_pred_shift_imm_right opc, string asm, string Ps, SDPatternOperator op = null_frag> { def _B : SVEPseudo2Instr, @@ -4809,7 +4806,7 @@ def : SVE_3_Op_Imm_Pat(NAME # _D)>; } -multiclass sve_int_bin_pred_shift_0_right_zx { +multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd { def _ZERO_B : PredTwoOpImmPseudo; def _ZERO_H : PredTwoOpImmPseudo; def _ZERO_S : PredTwoOpImmPseudo; @@ -4863,7 +4860,7 @@ def : SVE_3_Op_Pat(NAME # _D)>; } -multiclass sve_int_bin_pred_zx { +multiclass sve_int_bin_pred_zeroing_bhsd { def _ZERO_B : PredTwoOpPseudo; def _ZERO_H : PredTwoOpPseudo; def _ZERO_S : PredTwoOpPseudo; Index: llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve < %s 2>%t | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; WARN-NOT: warning Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; WARN-NOT: warning Index: llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; WARN-NOT: warning Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s ; ; SQSHLU