Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -918,8 +918,9 @@ /// analyzeSelect and returned Optimizable = true, attempt to optimize MI by /// merging it with one of its operands. Returns NULL on failure. /// - /// When successful, returns the new select instruction. The client is - /// responsible for deleting MI. + /// When successful, returns the resulting select instruction. The client + /// is responsible for deleting MI, which it should do unless the same + /// (modified) MI is returned. /// /// If both sides of the select can be optimized, PreferFalse is used to pick /// a side. Index: llvm/lib/CodeGen/PeepholeOptimizer.cpp =================================================================== --- llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -638,12 +638,15 @@ return false; if (!Optimizable) return false; - if (!TII->optimizeSelect(MI, LocalMIs)) - return false; - LLVM_DEBUG(dbgs() << "Deleting select: " << MI); - MI.eraseFromParent(); - ++NumSelects; - return true; + if (MachineInstr *NewSelect = TII->optimizeSelect(MI, LocalMIs)) { + if (NewSelect != &MI) { + LLVM_DEBUG(dbgs() << "Deleting select: " << MI); + MI.eraseFromParent(); + } + ++NumSelects; + return true; + } + return false; } /// Check if a simpler conditional branch can be generated. Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -257,6 +257,13 @@ BranchProbability Probability) const override; bool PredicateInstruction(MachineInstr &MI, ArrayRef Pred) const override; + bool analyzeSelect(const MachineInstr &MI, + SmallVectorImpl &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const override; + MachineInstr *optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -767,6 +767,106 @@ return false; } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt MultipleCmpOpUsers("peep-multiple-users", cl::init(true)); +static cl::opt SearchImmLoad("peep-search-immload", cl::init(false)); + +struct Select01 { + int64_t CCMask = 0; + Register CmpReg = 0; + const MachineInstr *CmpMI = nullptr; + bool NE0Case = false; + bool EQ1Case = false; + + bool findInstructions(const MachineInstr &MI) { + const MachineBasicBlock *MBB = MI.getParent(); + const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + unsigned Opc = MI.getOpcode(); + assert(MI.getDesc().isSelect() && "MI must be a select instruction"); + assert((Opc == SystemZ::LOCHIMux || Opc == SystemZ::LOCGHI) && + "Unexpected opcode"); + if (MI.getOperand(2).getImm() != 1) + return false; + CCMask = MI.getOperand(4).getImm(); + Register SrcReg = MI.getOperand(1).getReg(); + bool SrcIsZero = false; + for (MachineBasicBlock::const_iterator II = MI.getIterator(); + II != MBB->begin(); ) { + unsigned CurrOpc = (--II)->getOpcode(); + if ((CurrOpc == SystemZ::LHIMux || CurrOpc == SystemZ::LGHI) && + II->getOperand(0).getReg() == SrcReg && II->getOperand(1).getImm() == 0) + SrcIsZero = true; + else if (II->definesRegister(SystemZ::CC)) { + if (!SrcIsZero && SearchImmLoad) { + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + unsigned SrcOpc = SrcMI->getOpcode(); + if ((SrcOpc == SystemZ::LHIMux || SrcOpc == SystemZ::LGHI) && + SrcMI->getOperand(1).getImm() == 0) + SrcIsZero = true; + } + if (!SrcIsZero || + (CurrOpc != SystemZ::CGHI && + (CurrOpc != SystemZ::CHIMux || Opc != SystemZ::LOCHIMux))) + return false; + CmpMI = &*II; + int64_t CmpImm = II->getOperand(1).getImm(); + NE0Case = CCMask == SystemZ::CCMASK_CMP_NE && CmpImm == 0; + EQ1Case = CCMask == SystemZ::CCMASK_CMP_EQ && CmpImm == 1; + if (!NE0Case && !EQ1Case) + return false; + CmpReg = II->getOperand(0).getReg(); + if (!MRI->hasOneNonDBGUse(CmpReg) && !MultipleCmpOpUsers) + return false; + return true; // XXX Check if all Imm-Load users optimizable? + } + } + + return false; + } +}; + +bool SystemZInstrInfo::analyzeSelect(const MachineInstr &MI, + SmallVectorImpl &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const { + Select01 S; + if (S.findInstructions(MI)) { + Optimizable = true; + return false; + } + return true; +} + +MachineInstr *SystemZInstrInfo:: +optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool) const { + Select01 S; + bool Found = S.findInstructions(MI); (void)Found; + assert(Found && "Bad analyzeSelect() / optimizeSelect() calls."); + MachineOperand &CmpSrcMO = const_cast(S.CmpMI->getOperand(0)); + MachineOperand &LOCSrcMO = MI.getOperand(1); + + LOCSrcMO.setReg(CmpSrcMO.getReg()); + CmpSrcMO.setIsKill(false); + if (MI.getOpcode() == SystemZ::LOCHIMux) { + unsigned CmpOpc = S.CmpMI->getOpcode(); + if (CmpOpc == SystemZ::CHIMux) + LOCSrcMO.setSubReg(CmpSrcMO.getSubReg()); + else { + assert(CmpOpc == SystemZ::CGHI && "Unexpected compare opcode."); + LOCSrcMO.setSubReg(SystemZ::subreg_l32); + } + } + if (S.EQ1Case) { + MI.getOperand(2).setImm(0); + MI.getOperand(4).setImm(SystemZ::CCMASK_CMP_NE); + } + + return &MI; +} + void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -551,10 +551,12 @@ // by the PeepholeOptimizer via FoldImmediate. // Expands to LOCHI or LOCHHI, depending on the choice of register. - def LOCHIMux : CondBinaryRIEPseudo; + let isSelect = 1 in + def LOCHIMux : CondBinaryRIEPseudo; defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; - defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; + let isSelect = 1 in + defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; // Move register on condition. Matched via DAG pattern and // created by early if-conversion. Index: llvm/test/CodeGen/SystemZ/setcc-05.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/setcc-05.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test SETCC for an integer comparison against 0. The 0 does not need to be +; loaded if the condition is NE. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; ICMP NE 0: no need to load 0. +define i32 @fun0(i8 zeroext %b) { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i8 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 0: need to load 0. +define i32 @fun2(i8 zeroext %b) { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 0 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochie %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i8 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: The whole register is not checked, so need to load 0. +define i32 @fun3(i32 %b) { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 255 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochine %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %t = trunc i32 %b to i8 + %cc = icmp ne i8 %t, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: i64 with i32 use +define i32 @fun4(i64 %b) { +; CHECK-LABEL: fun4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i64 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: i64 with i64 use. +define i64 @fun5(i64 %b) { +; CHECK-LABEL: fun5: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: cghi %r2, 0 +; CHECK-NEXT: locghilh %r2, 1 +; CHECK-NEXT: br %r14 +bb: + %cc = icmp ne i64 %b, 0 + %conv = zext i1 %cc to i64 + ret i64 %conv +} + +; ICMP EQ 1: no need to load 1. +define i32 @fun6(i8 zeroext %b) { +; CHECK-LABEL: fun6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 1 +; CHECK-NEXT: lochilh %r2, 0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i8 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 1: need to load 1. +define i32 @fun7(i8 zeroext %b) { +; CHECK-LABEL: fun7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 1 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i8 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 1: i64 with i32 use +define i32 @fun8(i64 %b) { +; CHECK-LABEL: fun8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 1 +; CHECK-NEXT: lochilh %r2, 0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i64 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 1: i64 with i64 use +define i64 @fun9(i64 %b) { +; CHECK-LABEL: fun9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 1 +; CHECK-NEXT: locghilh %r2, 0 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i64 %b, 1 + %conv = zext i1 %cc to i64 + ret i64 %conv +}