Index: llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -495,6 +495,11 @@ // so the Destructive Operand must be unique. bool DOPRegIsUnique = false; switch (DType) { + case AArch64::DestructiveBinary: + // Zeros the lanes in z0 that aren't active in p0 with sequence movprfx + // z0.b, p0/z, z0.b; add z0.b, z0.b, #0; + DOPRegIsUnique = true; + break; case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: DOPRegIsUnique = @@ -527,23 +532,27 @@ // Get the right MOVPRFX uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); - unsigned MovPrfx, MovPrfxZero; + unsigned MovPrfx, AddZero, MovPrfxZero; switch (ElementSize) { case AArch64::ElementSizeNone: case AArch64::ElementSizeB: MovPrfx = AArch64::MOVPRFX_ZZ; + AddZero = AArch64::ADD_ZI_B; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; break; case AArch64::ElementSizeH: MovPrfx = AArch64::MOVPRFX_ZZ; + AddZero = AArch64::ADD_ZI_H; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; break; case AArch64::ElementSizeS: MovPrfx = AArch64::MOVPRFX_ZZ; + AddZero = AArch64::ADD_ZI_S; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; break; case AArch64::ElementSizeD: MovPrfx = AArch64::MOVPRFX_ZZ; + AddZero = AArch64::ADD_ZI_D; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; break; default: @@ -553,7 +562,7 @@ // // Create the destructive operation (if required) // - MachineInstrBuilder PRFX, DOP; + MachineInstrBuilder PRFX, DOP, ADD; if (FalseZero) { #ifndef NDEBUG assert(DOPRegIsUnique && "The destructive operand should be unique"); @@ -579,6 +588,14 @@ DOPIdx = 0; } + // Create the additional ADD to zero the lanes + if (DType == AArch64::DestructiveBinary) + ADD = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AddZero)) + .addReg(DstReg, RegState::Define) + .addReg(DstReg) + .addImm(0) + .addImm(0); + // // Create the destructive operation // @@ -591,6 +608,7 @@ .add(MI.getOperand(PredIdx)) .add(MI.getOperand(SrcIdx)); break; + case AArch64::DestructiveBinary: case AArch64::DestructiveBinaryImm: case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -433,7 +433,7 @@ defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; } // End HasSVEorSME, UseExperimentalZeroingPseudos let Predicates = [HasSVEorSME] in { Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -344,8 +344,8 @@ define @bic_i8_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i8_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: add z0.b, z0.b, #0 // =0x0 ; CHECK-NEXT: bic z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -358,8 +358,8 @@ define @bic_i16_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i16_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: add z0.h, z0.h, #0 // =0x0 ; CHECK-NEXT: bic z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -372,8 +372,8 @@ define @bic_i32_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i32_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: add z0.s, z0.s, #0 // =0x0 ; CHECK-NEXT: bic z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -386,8 +386,8 @@ define @bic_i64_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: add z0.d, z0.d, #0 // =0x0 ; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -397,6 +397,23 @@ ret %out } +; BIC (i.e. A & ~B) is not a commutative operation, so disable it when the +; destination operand is not the destructive operand +define @bic_i64_zero_no_comm( %pg, %a, %b) { +; CHECK-LABEL: bic_i64_zero_no_comm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: bic z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %b, + %a_z) + ret %out +} + declare @llvm.aarch64.sve.add.nxv16i8(, , ) declare @llvm.aarch64.sve.add.nxv8i16(, , ) declare @llvm.aarch64.sve.add.nxv4i32(, , )