Index: llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -64,6 +64,8 @@ private: bool expandMBB(MachineBasicBlock &MBB); + bool foldUnary(MachineInstr &MI, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -628,6 +630,110 @@ return true; } +/// \brief Returns the defining instruction for MachineOperand MO, +/// which should be a register. If we are searching for the defining +/// instruction for the purpose of removing it, it only returns +/// the defining instruction if it is not read between Def..*MBBI. +static bool getRegisterDefInstr(MachineBasicBlock::iterator MBBI, + const MachineOperand &MO, + MachineBasicBlock::iterator &Def, + bool ForRemoving = false) { + assert(MO.isReg() && "Operand must be a register"); + unsigned Reg = MO.getReg(); + + if (MBBI == MBBI->getParent()->begin()) + return false; + + MachineBasicBlock::iterator RI = MBBI; + for (--RI; RI != MBBI->getParent()->begin(); --RI) { + // If we want to remove the Def, it cannot be *used* anywhere + // else in between the Def and MBBI + if (ForRemoving && !RI->definesRegister(Reg) && RI->readsRegister(Reg)) + return false; + else if (RI->definesRegister(Reg)) + break; + } + + if (!RI->definesRegister(Reg)) + return false; + + Def = RI; + return true; +} + +/// \brief Replace instructions where the destructive operand is +/// a vector of zeros with a bundled MOVPRFX instruction, e.g. +/// Transform: +/// %X0 = DUP_ZI_S 0, 0 +/// %X0 = FNEG_ZPmZ_S X0, P0, X2 +/// into: +/// X0 = MOVPRFX P0/z, X0 +/// X0 = FNEG_ZPmZ_S X0, P0, X2 +bool AArch64ExpandPseudo::foldUnary(MachineInstr &MI, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + // Zd != Zn + if (MI.getOperand(0).getReg() == MI.getOperand(3).getReg()) + return false; + + // Zsd must be a DUP_ZI_(B|H|S|D) 0, 0 + MachineBasicBlock::iterator Def; + if (!getRegisterDefInstr(MBBI, MI.getOperand(1), Def, true)) + return false; + + switch (Def->getOpcode()) { + case AArch64::DUP_ZI_B: + case AArch64::DUP_ZI_H: + case AArch64::DUP_ZI_S: + case AArch64::DUP_ZI_D: + break; + default: + return false; + } + + if (!Def->getOperand(1).isImm() || Def->getOperand(1).getImm() != 0) + return false; + + unsigned MovPrfx; + switch (TII->getElementSizeForOpcode(MI.getOpcode())) { + case AArch64::ElementSizeNone: + case AArch64::ElementSizeB: + MovPrfx = AArch64::MOVPRFX_ZPzZ_B; + break; + case AArch64::ElementSizeH: + MovPrfx = AArch64::MOVPRFX_ZPzZ_H; + break; + case AArch64::ElementSizeS: + MovPrfx = AArch64::MOVPRFX_ZPzZ_S; + break; + case AArch64::ElementSizeD: + MovPrfx = AArch64::MOVPRFX_ZPzZ_D; + break; + default: + llvm_unreachable("Unsupported ElementSize"); + } + + // Create a Zeroing MOVPRFX + MachineInstrBuilder PRFX, NewMI; + PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) + .addReg(MI.getOperand(0).getReg(), RegState::Define) + .addReg(MI.getOperand(2).getReg()) + .addReg(MI.getOperand(1).getReg(), RegState::Undef); + + NewMI = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MI.getOpcode())) + .add(MI.getOperand(0)) + .addReg(MI.getOperand(1).getReg(), RegState::Kill) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)); + + finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); + transferImpOps(MI, PRFX, NewMI); + + Def->eraseFromParent(); + MBBI->eraseFromParent(); + + return true; +} + bool AArch64ExpandPseudo::expandSetTagLoop( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI) { @@ -1012,10 +1118,13 @@ int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); if (OrigInstr != -1) { auto &Orig = TII->get(OrigInstr); - if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) - != AArch64::NotDestructive) { + if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) != + AArch64::NotDestructive) { return expand_DestructiveOp(MI, MBB, MBBI); } + } else if ((MI.getDesc().TSFlags & AArch64::DestructiveInstTypeMask) == + AArch64::DestructiveUnaryPassthru) { + return foldUnary(MI, MBB, MBBI); } switch (Opcode) { Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm-zeroing.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-fp-int-binary-logarithm-zeroing.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=1 < %s | FileCheck %s + +; +; FLOGB +; + +; NOTE: The %unused paramter ensures z0 is free, leading to a simpler test. +define @flogb_f16( %unused, %pg, %a) { +; CHECK-LABEL: flogb_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: flogb z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.flogb.nxv8f16( zeroinitializer, + %pg, + %a) + ret %out +} + +define @flogb_f32( %unused, %pg, %a) { +; CHECK-LABEL: flogb_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: flogb z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.flogb.nxv4f32( zeroinitializer, + %pg, + %a) + ret %out +} + +define @flogb_f64( %unused, %pg, %a) { +; CHECK-LABEL: flogb_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: flogb z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.flogb.nxv2f64( zeroinitializer, + %pg, + %a) + ret %out +} + +; +; FNEG +; + +define @fneg_f32( %unused, %pg, %a) { +; CHECK-LABEL: fneg_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fneg z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.fneg.nxv4f32( zeroinitializer, + %pg, + %a) + ret %out +} + +declare @llvm.aarch64.sve.flogb.nxv8f16(, , ) +declare @llvm.aarch64.sve.flogb.nxv4f32(, , ) +declare @llvm.aarch64.sve.flogb.nxv2f64(, , ) +declare @llvm.aarch64.sve.fneg.nxv4f32(, , )