Index: llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -495,6 +495,10 @@ // so the Destructive Operand must be unique. bool DOPRegIsUnique = false; switch (DType) { + case AArch64::DestructiveBinary: + // Don't check the SrcIdx for DOPRegIsUnique to avoid the crash + DOPRegIsUnique = DstReg == MI.getOperand(DOPIdx).getReg(); + break; case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: DOPRegIsUnique = @@ -527,23 +531,27 @@ // Get the right MOVPRFX uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); - unsigned MovPrfx, MovPrfxZero; + unsigned MovPrfx, AddZero, MovPrfxZero; switch (ElementSize) { case AArch64::ElementSizeNone: case AArch64::ElementSizeB: MovPrfx = AArch64::MOVPRFX_ZZ; + AddZero = AArch64::ADD_ZI_B; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; break; case AArch64::ElementSizeH: MovPrfx = AArch64::MOVPRFX_ZZ; + AddZero = AArch64::ADD_ZI_H; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; break; case AArch64::ElementSizeS: MovPrfx = AArch64::MOVPRFX_ZZ; + AddZero = AArch64::ADD_ZI_S; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; break; case AArch64::ElementSizeD: MovPrfx = AArch64::MOVPRFX_ZZ; + AddZero = AArch64::ADD_ZI_D; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; break; default: @@ -553,7 +561,7 @@ // // Create the destructive operation (if required) // - MachineInstrBuilder PRFX, DOP; + MachineInstrBuilder PRFX, DOP, ADD; if (FalseZero) { #ifndef NDEBUG assert(DOPRegIsUnique && "The destructive operand should be unique"); @@ -579,6 +587,18 @@ DOPIdx = 0; } + // Create the additional ADD to zero the lanes when the DstReg is not unique. + // Zeros the lanes in z0 that aren't active in p0 with sequence movprfx + // z0.b, p0/z, z0.b; add z0.b, z0.b, #0; + if (DType == AArch64::DestructiveBinary && + DstReg == MI.getOperand(SrcIdx).getReg()) { + ADD = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AddZero)) + .addReg(DstReg, RegState::Define) + .addReg(DstReg) + .addImm(0) + .addImm(0); + } + // // Create the destructive operation // @@ -591,6 +611,7 @@ .add(MI.getOperand(PredIdx)) .add(MI.getOperand(SrcIdx)); break; + case AArch64::DestructiveBinary: case AArch64::DestructiveBinaryImm: case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -433,7 +433,7 @@ defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; } // End HasSVEorSME, UseExperimentalZeroingPseudos let Predicates = [HasSVEorSME] in { Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -344,8 +344,7 @@ define @bic_i8_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i8_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b ; CHECK-NEXT: bic z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -358,8 +357,7 @@ define @bic_i16_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i16_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h ; CHECK-NEXT: bic z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -372,8 +370,7 @@ define @bic_i32_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i32_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s ; CHECK-NEXT: bic z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -386,8 +383,7 @@ define @bic_i64_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d ; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -397,6 +393,39 @@ ret %out } +; BIC (i.e. A & ~A) is illegal operation with movprfx, so the codegen depend on IR before expand-pseudo +define @bic_i64_zero_no_unique_reg( %pg, %a) { +; CHECK-LABEL: bic_i64_zero_no_unique_reg: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, z0.d +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %a_z, + %a_z) + ret %out +} + +; BIC (i.e. A & ~B) is not a commutative operation, so disable it when the +; destination operand is not the destructive operand +define @bic_i64_zero_no_comm( %pg, %a, %b) { +; CHECK-LABEL: bic_i64_zero_no_comm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: bic z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %b, + %a_z) + ret %out +} + declare @llvm.aarch64.sve.add.nxv16i8(, , ) declare @llvm.aarch64.sve.add.nxv8i16(, , ) declare @llvm.aarch64.sve.add.nxv4i32(, , )