Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2060,6 +2060,10 @@ defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; + + defm ASR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd; + defm LSR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd; + defm LSL_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd; } // End HasSVEorSME, UseExperimentalZeroingPseudos let Predicates = [HasSVEorSME] in { Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -567,6 +567,12 @@ : Pat<(vt (op (pt (SVEAnyPredicate)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))), (inst $Rn, i32:$imm)>; +class SVE_2_Op_Imm_Pat_Zero +: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Op1, (SVEDup0)), + (vt (splat_vector (it (cpx i32:$imm)))))), + (inst $Pg, $Op1, i32:$imm)>; + class SVE_2_Op_Fp_Imm_Pat(NAME # _ZERO_D)>; } +multiclass sve_int_bin_pred_imm_zeroing_bhsd { + def _ZERO_B : PredTwoOpImmPseudo, FalseLanesZero>; + def _ZERO_H : PredTwoOpImmPseudo, FalseLanesZero>; + def _ZERO_S : PredTwoOpImmPseudo, FalseLanesZero>; + def _ZERO_D : PredTwoOpImmPseudo, FalseLanesZero>; + + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_B)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_H)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_S)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_D)>; +} + multiclass sve_int_bin_pred_shift_wide opc, string asm, SDPatternOperator op> { def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s + +;; ASR +define @asr_i8( %a, %pg) { +; CHECK-LABEL: asr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i8 8, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %vsel, %shuffle) + ret %res +} + +define @asr_i16( %a, %pg) { +; CHECK-LABEL: asr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i16 16, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv8i16( %pg, %vsel, %shuffle) + ret %res +} + +define @asr_i32( %a, %pg) local_unnamed_addr #0 { +; CHECK-LABEL: asr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i32 32, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv4i32( %pg, %vsel, %shuffle) + ret %res +} + +define @asr_i64( %a, %pg) { +; CHECK-LABEL: asr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i64 64, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv2i64( %pg, %vsel, %shuffle) + ret %res +} + +;; LSL +define @lsl_i8( %a, %pg) { +; CHECK-LABEL: lsl_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i8 7, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, %vsel, %shuffle) + ret %res +} + +define @lsl_i16( %a, %pg) { +; CHECK-LABEL: lsl_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i16 15, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, %vsel, %shuffle) + ret %res +} + +define @lsl_i32( %a, %pg) local_unnamed_addr #0 { +; CHECK-LABEL: lsl_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i32 31, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, %vsel, %shuffle) + ret %res +} + +define @lsl_i64( %a, %pg) { +; CHECK-LABEL: lsl_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i64 63, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, %vsel, %shuffle) + ret %res +} + +;; LSR +define @lsr_i8( %a, %pg) { +; CHECK-LABEL: lsr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i8 8, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, %vsel, %shuffle) + ret %res +} + +define @lsr_i16( %a, %pg) { +; CHECK-LABEL: lsr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i16 16, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, %vsel, %shuffle) + ret %res +} + +define @lsr_i32( %a, %pg) local_unnamed_addr #0 { +; CHECK-LABEL: lsr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i32 32, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, %vsel, %shuffle) + ret %res +} + +define @lsr_i64( %a, %pg) { +; CHECK-LABEL: lsr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i64 64, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, %vsel, %shuffle) + ret %res +} + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.nxv2i64(, , )