Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -345,13 +345,18 @@ defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + + defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; } // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos let Predicates = [HasSVEorStreamingSVE] in { - defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>; - defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>; - defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>; - defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", int_aarch64_sve_bic>; + defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", int_aarch64_sve_orr, DestructiveBinaryComm>; + defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", int_aarch64_sve_eor, DestructiveBinaryComm>; + defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", "AND_ZPZZ", int_aarch64_sve_and, DestructiveBinaryComm>; + defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", "BIC_ZPZZ", int_aarch64_sve_bic, DestructiveBinaryComm>; defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>; defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2767,11 +2767,19 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_int_bin_pred_log opc, string asm, SDPatternOperator op> { - def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>; - def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>; - def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>; - def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>; +multiclass sve_int_bin_pred_log opc, string asm, string Ps, + SDPatternOperator op, + DestructiveInstTypeEnum flags> { + let DestructiveInstType = flags in { + def _B : sve_int_bin_pred_arit_log<0b00, 0b11, opc, asm, ZPR8>, + SVEPseudo2Instr; + def _H : sve_int_bin_pred_arit_log<0b01, 0b11, opc, asm, ZPR16>, + SVEPseudo2Instr; + def _S : sve_int_bin_pred_arit_log<0b10, 0b11, opc, asm, ZPR32>, + SVEPseudo2Instr; + def _D : sve_int_bin_pred_arit_log<0b11, 0b11, opc, asm, ZPR64>, + SVEPseudo2Instr; + } def : SVE_3_Op_Pat(NAME # _B)>; def : SVE_3_Op_Pat(NAME # _H)>; Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s ; @@ -6,9 +7,10 @@ define @add_i8_zero( %pg, %a, %b) { ; CHECK-LABEL: add_i8_zero: -; CHECK: movprfx z0.b, p0/z, z0.b -; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.add.nxv16i8( %pg, %a_z, @@ -18,9 +20,10 @@ define @add_i16_zero( %pg, %a, %b) { ; CHECK-LABEL: add_i16_zero: -; CHECK: movprfx z0.h, p0/z, z0.h -; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.add.nxv8i16( %pg, %a_z, @@ -30,9 +33,10 @@ define @add_i32_zero( %pg, %a, %b) { ; CHECK-LABEL: add_i32_zero: -; CHECK: movprfx z0.s, p0/z, z0.s -; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.add.nxv4i32( %pg, %a_z, @@ -42,9 +46,10 @@ define @add_i64_zero( %pg, %a, %b) { ; CHECK-LABEL: add_i64_zero: -; CHECK: movprfx z0.d, p0/z, z0.d -; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.add.nxv2i64( %pg, %a_z, @@ -58,9 +63,10 @@ define @sub_i8_zero( %pg, %a, %b) { ; CHECK-LABEL: sub_i8_zero: -; CHECK: movprfx z0.b, p0/z, z0.b -; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.sub.nxv16i8( %pg, %a_z, @@ -70,9 +76,10 @@ define @sub_i16_zero( %pg, %a, %b) { ; CHECK-LABEL: sub_i16_zero: -; CHECK: movprfx z0.h, p0/z, z0.h -; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.sub.nxv8i16( %pg, %a_z, @@ -82,9 +89,10 @@ define @sub_i32_zero( %pg, %a, %b) { ; CHECK-LABEL: sub_i32_zero: -; CHECK: movprfx z0.s, p0/z, z0.s -; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.sub.nxv4i32( %pg, %a_z, @@ -94,9 +102,10 @@ define @sub_i64_zero( %pg, %a, %b) { ; CHECK-LABEL: sub_i64_zero: -; CHECK: movprfx z0.d, p0/z, z0.d -; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.sub.nxv2i64( %pg, %a_z, @@ -110,9 +119,10 @@ define @subr_i8_zero( %pg, %a, %b) { ; CHECK-LABEL: subr_i8_zero: -; CHECK: movprfx z0.b, p0/z, z0.b -; CHECK-NEXT: subr z0.b, p0/m, z0.b, z1.b -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: subr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.subr.nxv16i8( %pg, %a_z, @@ -122,9 +132,10 @@ define @subr_i16_zero( %pg, %a, %b) { ; CHECK-LABEL: subr_i16_zero: -; CHECK: movprfx z0.h, p0/z, z0.h -; CHECK-NEXT: subr z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: subr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.subr.nxv8i16( %pg, %a_z, @@ -134,9 +145,10 @@ define @subr_i32_zero( %pg, %a, %b) { ; CHECK-LABEL: subr_i32_zero: -; CHECK: movprfx z0.s, p0/z, z0.s -; CHECK-NEXT: subr z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: subr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.subr.nxv4i32( %pg, %a_z, @@ -146,9 +158,10 @@ define @subr_i64_zero( %pg, %a, %b) { ; CHECK-LABEL: subr_i64_zero: -; CHECK: movprfx z0.d, p0/z, z0.d -; CHECK-NEXT: subr z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: subr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.subr.nxv2i64( %pg, %a_z, @@ -156,6 +169,230 @@ ret %out } +; +; ORR +; + +define @orr_i8_zero( %pg, %a, %b) { +; CHECK-LABEL: orr_i8_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: orr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.orr.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @orr_i16_zero( %pg, %a, %b) { +; CHECK-LABEL: orr_i16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: orr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.orr.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @orr_i32_zero( %pg, %a, %b) { +; CHECK-LABEL: orr_i32_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: orr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.orr.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @orr_i64_zero( %pg, %a, %b) { +; CHECK-LABEL: orr_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: orr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.orr.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + +; +; EOR +; + +define @eor_i8_zero( %pg, %a, %b) { +; CHECK-LABEL: eor_i8_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: eor z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.eor.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @eor_i16_zero( %pg, %a, %b) { +; CHECK-LABEL: eor_i16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: eor z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.eor.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @eor_i32_zero( %pg, %a, %b) { +; CHECK-LABEL: eor_i32_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: eor z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.eor.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @eor_i64_zero( %pg, %a, %b) { +; CHECK-LABEL: eor_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: eor z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.eor.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + +; +; AND +; + +define @and_i8_zero( %pg, %a, %b) { +; CHECK-LABEL: and_i8_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: and z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.and.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @and_i16_zero( %pg, %a, %b) { +; CHECK-LABEL: and_i16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: and z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.and.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @and_i32_zero( %pg, %a, %b) { +; CHECK-LABEL: and_i32_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: and z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.and.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @and_i64_zero( %pg, %a, %b) { +; CHECK-LABEL: and_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: and z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.and.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + +; +; BIC +; + +define @bic_i8_zero( %pg, %a, %b) { +; CHECK-LABEL: bic_i8_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: bic z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv16i8( %pg, + %a_z, + %b) + ret %out +} + +define @bic_i16_zero( %pg, %a, %b) { +; CHECK-LABEL: bic_i16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: bic z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv8i16( %pg, + %a_z, + %b) + ret %out +} + +define @bic_i32_zero( %pg, %a, %b) { +; CHECK-LABEL: bic_i32_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: bic z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv4i32( %pg, + %a_z, + %b) + ret %out +} + +define @bic_i64_zero( %pg, %a, %b) { +; CHECK-LABEL: bic_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %a_z, + %b) + ret %out +} + declare @llvm.aarch64.sve.add.nxv16i8(, , ) declare @llvm.aarch64.sve.add.nxv8i16(, , ) declare @llvm.aarch64.sve.add.nxv4i32(, , ) @@ -170,3 +407,23 @@ declare @llvm.aarch64.sve.subr.nxv8i16(, , ) declare @llvm.aarch64.sve.subr.nxv4i32(, , ) declare @llvm.aarch64.sve.subr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.orr.nxv16i8(, , ) +declare @llvm.aarch64.sve.orr.nxv8i16(, , ) +declare @llvm.aarch64.sve.orr.nxv4i32(, , ) +declare @llvm.aarch64.sve.orr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.eor.nxv16i8(, , ) +declare @llvm.aarch64.sve.eor.nxv8i16(, , ) +declare @llvm.aarch64.sve.eor.nxv4i32(, , ) +declare @llvm.aarch64.sve.eor.nxv2i64(, , ) + +declare @llvm.aarch64.sve.and.nxv16i8(, , ) +declare @llvm.aarch64.sve.and.nxv8i16(, , ) +declare @llvm.aarch64.sve.and.nxv4i32(, , ) +declare @llvm.aarch64.sve.and.nxv2i64(, , ) + +declare @llvm.aarch64.sve.bic.nxv16i8(, , ) +declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +declare @llvm.aarch64.sve.bic.nxv4i32(, , ) +declare @llvm.aarch64.sve.bic.nxv2i64(, , )