diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -31,6 +31,11 @@ // If AArch64's 32-bit form of instruction defines the source operand of // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source // operand are set to zero. +// 5. Remove redundant CSELs when performing safe integer division. +// +// (CSEL 0 ({S|U}DIV x y) EQ (CMP y 0)) => ({S|U}DIV x y) +// +// The CSEL is redundant, because {S|U}DIV returns 0 when divisor is 0 // //===----------------------------------------------------------------------===// @@ -97,6 +102,8 @@ SmallSetVector &ToBeRemoved); bool visitORR(MachineInstr &MI, SmallSetVector &ToBeRemoved); + bool visitCSEL(MachineInstr &MI, + SmallSetVector &ToBeRemoved); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -296,6 +303,71 @@ }); } +static bool isDiv(unsigned Opcode) { + return Opcode == AArch64::UDIVWr || Opcode == AArch64::UDIVXr || + Opcode == AArch64::SDIVWr || Opcode == AArch64::SDIVXr; +} + +bool AArch64MIPeepholeOpt::visitCSEL( + MachineInstr &MI, SmallSetVector &ToBeRemoved) { + auto RetReg = MI.getOperand(0).getReg(); + auto LhsReg = MI.getOperand(1).getReg(); + auto RhsReg = MI.getOperand(2).getReg(); + auto CondCode = MI.getOperand(3).getImm(); + auto FlagsReg = MI.getOperand(4).getReg(); + + if (CondCode != AArch64CC::EQ || FlagsReg != AArch64::NZCV) { + return false; + } + + auto *LhsDef = MRI->getUniqueVRegDef(LhsReg); + auto *RhsDef = MRI->getUniqueVRegDef(RhsReg); + auto *FlagsDef = MRI->getUniqueVRegDef(FlagsReg); + + if (!LhsDef || !RhsDef || !FlagsDef) { + return false; + } + + // Is one of the two sides of the csel a div? + MachineInstr *DivDef, *DefaultDef; + if (isDiv(LhsDef->getOpcode())) { + DivDef = LhsDef; + DefaultDef = RhsDef; + } else if (isDiv(RhsDef->getOpcode())) { + DivDef = RhsDef; + DefaultDef = LhsDef; + } else { + return false; + } + + auto QuotientReg = DivDef->getOperand(0).getReg(); + auto DivisorReg = DivDef->getOperand(2).getReg(); + + // Is the default value zero? + if (DefaultDef->getOpcode() != AArch64::COPY || + (DefaultDef->getOperand(1).getReg() != AArch64::WZR && + DefaultDef->getOperand(1).getReg() != AArch64::XZR)) { + return false; + } + + // Is the divisor being compared againt zero? + if ((FlagsDef->getOpcode() != AArch64::SUBSWri && + FlagsDef->getOpcode() != AArch64::SUBSXri) || + FlagsDef->getOperand(1).getReg() != DivisorReg || + FlagsDef->getOperand(2).getImm() != 0) { + return false; + } + + MRI->replaceRegWith(RetReg, QuotientReg); + ToBeRemoved.insert(&MI); + ToBeRemoved.insert(DefaultDef); + if (MRI->hasOneUse(FlagsReg)) { + ToBeRemoved.insert(FlagsDef); + } + + return true; +} + // Checks if the corresponding MOV immediate instruction is applicable for // this peephole optimization. bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, @@ -398,6 +470,9 @@ if (skipFunction(MF.getFunction())) return false; + LLVM_DEBUG(dbgs() << "********** AArch64 Peephole Optimizer **********\n" + << "********** Function: " << MF.getName() << '\n'); + TII = static_cast(MF.getSubtarget().getInstrInfo()); TRI = static_cast( MF.getSubtarget().getRegisterInfo()); @@ -423,6 +498,10 @@ case AArch64::ORRWrs: Changed = visitORR(MI, ToBeRemoved); break; + case AArch64::CSELWr: + case AArch64::CSELXr: + Changed = visitCSEL(MI, ToBeRemoved); + break; case AArch64::ADDWrr: Changed = visitADDSUB(AArch64::ADDWri, AArch64::SUBWri, MI, ToBeRemoved); diff --git a/llvm/test/CodeGen/AArch64/checked-int-div.ll b/llvm/test/CodeGen/AArch64/checked-int-div.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/checked-int-div.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-gnu-linux -mcpu=neoversen1 -o - | FileCheck %s + +; Ensure that `y == 0 ? 0 : x / y` is optimised to a single UDIV/SDIV - UDIV/SDIV return 0 when divisor is 0 + +define i32 @u32_checked_div(i32 %0, i32 %1) { +; CHECK-LABEL: u32_checked_div: +; CHECK: // %bb.0: +; CHECK-NEXT: udiv w0, w0, w1 +; CHECK-NEXT: ret + %3 = icmp eq i32 %1, 0 + br i1 %3, label %6, label %4 + +4: + %5 = udiv i32 %0, %1 + br label %6 + +6: + %7 = phi i32 [ %5, %4 ], [ 0, %2 ] + ret i32 %7 +} + +define i64 @u64_checked_div(i64 %0, i64 %1) { +; CHECK-LABEL: u64_checked_div: +; CHECK: // %bb.0: +; CHECK-NEXT: udiv x0, x0, x1 +; CHECK-NEXT: ret + %3 = icmp eq i64 %1, 0 + br i1 %3, label %6, label %4 + +4: + %5 = udiv i64 %0, %1 + br label %6 + +6: + %7 = phi i64 [ %5, %4 ], [ 0, %2 ] + ret i64 %7 +} + +define i32 @i32_checked_div(i32 %0, i32 %1) { +; CHECK-LABEL: i32_checked_div: +; CHECK: // %bb.0: +; CHECK-NEXT: sdiv w0, w0, w1 +; CHECK-NEXT: ret + %3 = icmp eq i32 %1, 0 + br i1 %3, label %6, label %4 + +4: + %5 = sdiv i32 %0, %1 + br label %6 + +6: + %7 = phi i32 [ %5, %4 ], [ 0, %2 ] + ret i32 %7 +} + +define i64 @i64_checked_div(i64 %0, i64 %1) { +; CHECK-LABEL: i64_checked_div: +; CHECK: // %bb.0: +; CHECK-NEXT: sdiv x0, x0, x1 +; CHECK-NEXT: ret + %3 = icmp eq i64 %1, 0 + br i1 %3, label %6, label %4 + +4: + %5 = sdiv i64 %0, %1 + br label %6 + +6: + %7 = phi i64 [ %5, %4 ], [ 0, %2 ] + ret i64 %7 +}