Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -2519,6 +2519,7 @@ [(set regtype:$Rd, (AArch64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>, Sched<[WriteI, ReadI, ReadI]> { + let isSelect = 1; let Uses = [NZCV]; bits<5> Rd; @@ -4537,6 +4538,8 @@ (AArch64csel (vt regtype:$Rn), regtype:$Rm, (i32 imm:$cond), NZCV))]>, Sched<[WriteF]> { + let isSelect = 1; + bits<5> Rd; bits<5> Rn; bits<5> Rm; Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -187,6 +187,12 @@ const DebugLoc &DL, unsigned DstReg, ArrayRef Cond, unsigned TrueReg, unsigned FalseReg) const override; + bool analyzeSelect(const MachineInstr &MI, + SmallVectorImpl &Cond, unsigned &TrueOp, + unsigned &FalseOp, bool &Optimizable) const override; + MachineInstr *optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &NewMIs, + bool PreferFalse = false) const override; void getNoop(MCInst &NopInst) const override; /// analyzeCompare - For a comparison instruction, return the source registers Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -662,6 +662,77 @@ .addImm(CC); } +/// Returns true if a (F)CSEL* can be folded into a COPY. +static bool canFoldIntoCOPY(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::FCSELHrrr: + case AArch64::FCSELSrrr: + case AArch64::FCSELDrrr: + case AArch64::CSELWr: + case AArch64::CSELXr: { + const MachineOperand &TrueOperand = MI.getOperand(1); + const MachineOperand &FalseOperand = MI.getOperand(2); + if (!TrueOperand.isReg() || !FalseOperand.isReg()) + return false; + + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Optimize this if both sources are the same. + return removeCopies(MRI, TrueOperand.getReg()) == + removeCopies(MRI, FalseOperand.getReg()); + } + } +} + +bool AArch64InstrInfo::analyzeSelect(const MachineInstr &MI, + SmallVectorImpl &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const { + assert(MI.isSelect()); + if (!canFoldIntoCOPY(MI)) + return true; + // CSEL operands: + // 0: Def. + // 1: True use. + // 2: False use. + // 3: Condition code. + // 4: NZCV implicit use. + TrueOp = 1; + FalseOp = 2; + // Add the rest of the operands as part of the condition. + for (unsigned OpIdx = FalseOp; OpIdx < MI.getNumOperands(); ++OpIdx) + Cond.push_back(MI.getOperand(OpIdx)); + Optimizable = true; + return false; +} + +MachineInstr * +AArch64InstrInfo::optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &NewMIs, + bool PreferFalse) const { + assert(MI.isSelect()); + assert(MI.getOpcode() == AArch64::CSELWr || + MI.getOpcode() == AArch64::CSELXr || + MI.getOpcode() == AArch64::FCSELHrrr || + MI.getOpcode() == AArch64::FCSELSrrr || + MI.getOpcode() == AArch64::FCSELDrrr && "Unknown select instruction"); + MachineInstrBuilder NewMI = + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY), + MI.getOperand(0).getReg()); + if (PreferFalse) + NewMI.addReg(MI.getOperand(1).getReg()); + else + NewMI.addReg(MI.getOperand(2).getReg()); + + NewMIs.erase(&MI); + NewMIs.insert(NewMI); + + return NewMI; +} + /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx. static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) { uint64_t Imm = MI.getOperand(1).getImm(); Index: test/CodeGen/AArch64/csel-same-source.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/csel-same-source.mir @@ -0,0 +1,41 @@ +#RUN: llc -mtriple=aarch64-- -start-before machine-cse -stop-after peephole-opt %s -o - | FileCheck %s --check-prefix=CSE +#RUN: llc -mtriple=aarch64-- -run-pass peephole-opt %s -o - | FileCheck %s + +--- +name: csel +# CHECK-LABEL: name: csel +# A coalescing pass is done from machine-cse, and it results in a CSEL with the +# same true and false source. Make sure it gets elimintated. +tracksRegLiveness: true +body: | + bb.0: + %0:gpr32 = IMPLICIT_DEF + $nzcv = IMPLICIT_DEF + %1:gpr32 = UBFMWri %0, 13, 31 + %2:gpr32 = COPY %1 + %3:gpr32 = UBFMWri %0, 13, 31 + %4:gpr32 = COPY %3 + %5:gpr32 = CSELWr %2, %4, 12, implicit $nzcv + ; CSE-NOT: = CSELWr + ; CSE: = COPY + RET_ReallyLR +... +--- +name: cselcopy +# CHECK-LABEL: name: cselcopy +# Make sure we eliminate CSELS through copies. +tracksRegLiveness: true +body: | + bb.0: + %0:gpr32 = IMPLICIT_DEF + $nzcv = IMPLICIT_DEF + %1:gpr32 = COPY %0 + %2:gpr32 = COPY %0 + %3:gpr32 = COPY %1 + %4:gpr32 = COPY %2 + %5:gpr32 = CSELWr %3, %4, 12, implicit $nzcv + ; CHECK-NOT: = CSELWr + ; CHECK: = COPY + RET_ReallyLR +... +