Index: llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -451,9 +451,6 @@ Register DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); - if (DType == AArch64::DestructiveBinary) - assert(DstReg != MI.getOperand(3).getReg()); - bool UseRev = false; unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; switch (DType) { @@ -495,6 +492,11 @@ // so the Destructive Operand must be unique. bool DOPRegIsUnique = false; switch (DType) { + case AArch64::DestructiveBinary: + // Don't check the SrcIdx for DOPRegIsUnique to avoid the crash, will be + // addressed by additional LSL when necessary. + DOPRegIsUnique = DstReg == MI.getOperand(DOPIdx).getReg(); + break; case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: DOPRegIsUnique = @@ -527,23 +529,27 @@ // Get the right MOVPRFX uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); - unsigned MovPrfx, MovPrfxZero; + unsigned MovPrfx, LSLZero, MovPrfxZero; switch (ElementSize) { case AArch64::ElementSizeNone: case AArch64::ElementSizeB: MovPrfx = AArch64::MOVPRFX_ZZ; + LSLZero = AArch64::LSL_ZPmI_B; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; break; case AArch64::ElementSizeH: MovPrfx = AArch64::MOVPRFX_ZZ; + LSLZero = AArch64::LSL_ZPmI_H; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; break; case AArch64::ElementSizeS: MovPrfx = AArch64::MOVPRFX_ZZ; + LSLZero = AArch64::LSL_ZPmI_S; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; break; case AArch64::ElementSizeD: MovPrfx = AArch64::MOVPRFX_ZZ; + LSLZero = AArch64::LSL_ZPmI_D; MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; break; default: @@ -579,6 +585,18 @@ DOPIdx = 0; } + // Create the additional LSL to zero the lanes when the DstReg is not unique. + // Zeros the lanes in z0 that aren't active in p0 with sequence movprfx + // z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0; + if (DType == AArch64::DestructiveBinary && + DstReg == MI.getOperand(SrcIdx).getReg()) { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero)) + .addReg(DstReg, RegState::Define) + .add(MI.getOperand(PredIdx)) + .addReg(DstReg) + .addImm(0); + } + // // Create the destructive operation // @@ -591,6 +609,7 @@ .add(MI.getOperand(PredIdx)) .add(MI.getOperand(SrcIdx)); break; + case AArch64::DestructiveBinary: case AArch64::DestructiveBinaryImm: case AArch64::DestructiveBinaryComm: case AArch64::DestructiveBinaryCommWithRev: Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -433,7 +433,7 @@ defm ORR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm EOR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm AND_ZPZZ : sve_int_bin_pred_zeroing_bhsd; - defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; + defm BIC_ZPZZ : sve_int_bin_pred_zeroing_bhsd; } // End HasSVEorSME, UseExperimentalZeroingPseudos let Predicates = [HasSVEorSME] in { Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -344,8 +344,7 @@ define @bic_i8_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i8_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.b, #0 // =0x0 -; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b ; CHECK-NEXT: bic z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -358,8 +357,7 @@ define @bic_i16_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i16_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.h, #0 // =0x0 -; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h ; CHECK-NEXT: bic z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -372,8 +370,7 @@ define @bic_i32_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i32_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #0 // =0x0 -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s ; CHECK-NEXT: bic z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -386,8 +383,7 @@ define @bic_i64_zero( %pg, %a, %b) { ; CHECK-LABEL: bic_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d ; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer @@ -397,6 +393,39 @@ ret %out } +; BIC (i.e. A & ~A) is illegal operation with movprfx, so the codegen depend on IR before expand-pseudo +define @bic_i64_zero_no_unique_reg( %pg, %a) { +; CHECK-LABEL: bic_i64_zero_no_unique_reg: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, z0.d +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: bic z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %a_z, + %a_z) + ret %out +} + +; BIC (i.e. A & ~B) is not a commutative operation, so disable it when the +; destination operand is not the destructive operand +define @bic_i64_zero_no_comm( %pg, %a, %b) { +; CHECK-LABEL: bic_i64_zero_no_comm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.d, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d +; CHECK-NEXT: bic z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, + %b, + %a_z) + ret %out +} + declare @llvm.aarch64.sve.add.nxv16i8(, , ) declare @llvm.aarch64.sve.add.nxv8i16(, , ) declare @llvm.aarch64.sve.add.nxv4i32(, , ) Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+use-experimental-zeroing-pseudos -run-pass=aarch64-expand-pseudo %s -o - | FileCheck %s + +# Should create an additional LSL to zero the lanes as the DstReg is not unique + +--- | + define @bic_i16_zero( %pg, %a){ + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.bic.nxv8i16( %pg, %a_z, %a_z) + ret %out + } + + declare @llvm.aarch64.sve.bic.nxv8i16(, , ) +... +--- +name: bic_i16_zero +alignment: 4 +tracksRegLiveness: true +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$p0', virtual-reg: '' } + - { reg: '$z0', virtual-reg: '' } +body: | + bb.0 (%ir-block.0): + liveins: $p0, $z0 + + ; CHECK-LABEL: name: bic_i16_zero + ; CHECK: liveins: $p0, $z0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BUNDLE implicit-def $z0, implicit-def $q0, implicit-def $d0, implicit-def $s0, implicit-def $h0, implicit-def $b0, implicit-def $z0_hi, implicit killed $p0, implicit $z0 { + ; CHECK-NEXT: $z0 = MOVPRFX_ZPzZ_H $p0, $z0 + ; CHECK-NEXT: $z0 = LSL_ZPmI_H killed renamable $p0, internal $z0, 0 + ; CHECK-NEXT: $z0 = BIC_ZPmZ_H killed renamable $p0, internal killed $z0, internal killed renamable $z0 + ; CHECK-NEXT: } + ; CHECK-NEXT: RET undef $lr, implicit $z0 + renamable $z0 = BIC_ZPZZ_ZERO_H killed renamable $p0, killed renamable $z0, killed renamable $z0 + RET_ReallyLR implicit $z0 +...