Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -257,6 +257,13 @@ BranchProbability Probability) const override; bool PredicateInstruction(MachineInstr &MI, ArrayRef Pred) const override; + bool analyzeSelect(const MachineInstr &MI, + SmallVectorImpl &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const override; + MachineInstr *optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -767,6 +767,90 @@ return false; } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt MultipleCmpOpUsers("peep-multiple-users", cl::init(true)); + +bool SystemZInstrInfo::analyzeSelect(const MachineInstr &MI, + SmallVectorImpl &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const { + assert(MI.getDesc().isSelect() && "MI must be a select instruction"); + unsigned Opc = MI.getOpcode(); + if (Opc == SystemZ::LOCHIMux || Opc == SystemZ::LOCGHI) { + Optimizable = true; + return false; + } + return true; +} + +MachineInstr *SystemZInstrInfo:: +optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool) const { + MachineBasicBlock *MBB = MI.getParent(); + const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + unsigned Opc = MI.getOpcode(); + assert(MI.getDesc().isSelect() && "MI must be a select instruction"); + assert((Opc == SystemZ::LOCHIMux || Opc == SystemZ::LOCGHI) && + "Unexpected opcode"); + + // Check that the conditionally loaded value is 1. + if (MI.getOperand(2).getImm() != 1) + return nullptr; + + // Check that the incoming source value is a loaded immediate zero. + MachineInstr *SrcMI = MRI->getVRegDef(MI.getOperand(1).getReg()); + unsigned SrcOpc = SrcMI->getOpcode(); + if ((SrcOpc != SystemZ::LHIMux && SrcOpc != SystemZ::LGHI) || + SrcMI->getOperand(1).getImm() != 0) + return nullptr; + + // Scan backwards in MBB and find the CC definition. + MachineInstr *CmpMI = nullptr; + for (MachineBasicBlock::iterator II = MI.getIterator(); + II != MBB->begin();) + if ((--II)->definesRegister(SystemZ::CC)) { + CmpMI = &*II; + break; + } + if (CmpMI == nullptr) + return nullptr; + unsigned CmpOpcode = CmpMI->getOpcode(); + if (CmpOpcode != SystemZ::CGHI && + (CmpOpcode != SystemZ::CHIMux || Opc != SystemZ::LOCHIMux)) + return nullptr; + + // Check for a reusable known 0 or 1. + int64_t CmpImm = CmpMI->getOperand(1).getImm(); + int64_t CCMask = MI.getOperand(4).getImm(); + bool NE0Case = CCMask == SystemZ::CCMASK_CMP_NE && CmpImm == 0; + bool EQ1Case = CCMask == SystemZ::CCMASK_CMP_EQ && CmpImm == 1; + if (!NE0Case && !EQ1Case) + return nullptr; + + MachineOperand &CmpSrcMO = CmpMI->getOperand(0); + if (!MRI->hasOneNonDBGUse(CmpSrcMO.getReg()) && !MultipleCmpOpUsers) + return nullptr; + + MachineInstrBuilder MIB = + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(MI.getOpcode())) + .add(MI.getOperand(0)) + .add(CmpSrcMO) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + CmpSrcMO.setIsKill(false); + if (CmpOpcode == SystemZ::CGHI && MI.getOpcode() == SystemZ::LOCHIMux) + MIB->getOperand(1).setSubReg(SystemZ::subreg_l32); + if (EQ1Case) { + MIB->getOperand(2).setImm(0); + MIB->getOperand(4).setImm(SystemZ::CCMASK_CMP_NE); + } + + return MIB; +} + void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -551,10 +551,12 @@ // by the PeepholeOptimizer via FoldImmediate. // Expands to LOCHI or LOCHHI, depending on the choice of register. - def LOCHIMux : CondBinaryRIEPseudo; + let isSelect = 1 in + def LOCHIMux : CondBinaryRIEPseudo; defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; - defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; + let isSelect = 1 in + defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; // Move register on condition. Matched via DAG pattern and // created by early if-conversion. Index: llvm/test/CodeGen/SystemZ/setcc-05.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/setcc-05.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test SETCC for an integer comparison against 0. The 0 does not need to be +; loaded if the condition is NE. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; ICMP NE 0: no need to load 0. +define i32 @fun0(i8 zeroext %b) { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i8 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 0: need to load 0. +define i32 @fun2(i8 zeroext %b) { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 0 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochie %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i8 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: The whole register is not checked, so need to load 0. +define i32 @fun3(i32 %b) { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 255 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochine %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %t = trunc i32 %b to i8 + %cc = icmp ne i8 %t, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: i64 with i32 use +define i32 @fun4(i64 %b) { +; CHECK-LABEL: fun4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i64 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: i64 with i64 use. +define i64 @fun5(i64 %b) { +; CHECK-LABEL: fun5: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: cghi %r2, 0 +; CHECK-NEXT: locghilh %r2, 1 +; CHECK-NEXT: br %r14 +bb: + %cc = icmp ne i64 %b, 0 + %conv = zext i1 %cc to i64 + ret i64 %conv +} + +; ICMP EQ 1: no need to load 1. +define i32 @fun6(i8 zeroext %b) { +; CHECK-LABEL: fun6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 1 +; CHECK-NEXT: lochilh %r2, 0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i8 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 1: need to load 1. +define i32 @fun7(i8 zeroext %b) { +; CHECK-LABEL: fun7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 1 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i8 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 1: i64 with i32 use +define i32 @fun8(i64 %b) { +; CHECK-LABEL: fun8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 1 +; CHECK-NEXT: lochilh %r2, 0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i64 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 1: i64 with i64 use +define i64 @fun9(i64 %b) { +; CHECK-LABEL: fun9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 1 +; CHECK-NEXT: locghilh %r2, 0 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i64 %b, 1 + %conv = zext i1 %cc to i64 + ret i64 %conv +}