Index: llvm/lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -3436,7 +3436,7 @@ multiclass CondBinaryRIEPair opcode, RegisterOperand cls, ImmOpWithPattern imm> { - let isCodeGenOnly = 1 in + let isCodeGenOnly = 1, NumOpsKey = mnemonic, NumOpsValue = "2" in def "" : CondBinaryRIE; def Asm : AsmCondBinaryRIE; } Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -185,6 +185,8 @@ unsigned HighOpcode) const; void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned HighOpcode) const; + void expandLOCImmPseudo(MachineInstr &MI, unsigned Opcode, + unsigned ImmLoadOpcode) const; void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, unsigned Size) const; void expandLoadStackGuard(MachineInstr *MI) const; @@ -257,6 +259,13 @@ BranchProbability Probability) const override; bool PredicateInstruction(MachineInstr &MI, ArrayRef Pred) const override; + bool analyzeSelect(const MachineInstr &MI, + SmallVectorImpl &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const override; + MachineInstr *optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override; Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -191,6 +191,24 @@ MI.setDesc(get(Opcode)); } +void SystemZInstrInfo::expandLOCImmPseudo(MachineInstr &MI, unsigned Opcode, + unsigned ImmLoadOpc) const { + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + if (DstReg != SrcReg) { + MachineInstr *BuiltMI = + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ImmLoadOpc), DstReg) + .addImm(MI.getOperand(2).getImm() == 0 ? 1 : 0); + if (BuiltMI->isPseudo()) + expandPostRAPseudo(*BuiltMI); + MI.getOperand(1).setReg(DstReg); + } + MI.setDesc(get(Opcode)); + MI.tieOperands(0, 1); + if (MI.isPseudo()) + expandPostRAPseudo(MI); +} + // MI is an RR-style pseudo instruction that zero-extends the low Size bits // of one GRX32 into another. Replace it with LowOpcode if both operands // are low registers, otherwise use RISB[LH]G. @@ -767,6 +785,96 @@ return false; } +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt MultipleCmpOpUsers("peep-multiple-users", cl::init(true)); +static cl::opt LOCIPseudos("peep-pseudo", cl::init(false)); + +bool SystemZInstrInfo::analyzeSelect(const MachineInstr &MI, + SmallVectorImpl &Cond, + unsigned &TrueOp, unsigned &FalseOp, + bool &Optimizable) const { + assert(MI.getDesc().isSelect() && "MI must be a select instruction"); + unsigned Opc = MI.getOpcode(); + if (Opc == SystemZ::LOCHIMux || Opc == SystemZ::LOCGHI) { + Optimizable = true; + return false; + } + return true; +} + +MachineInstr *SystemZInstrInfo:: +optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl &SeenMIs, + bool) const { + MachineBasicBlock *MBB = MI.getParent(); + const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); + unsigned Opc = MI.getOpcode(); + assert(MI.getDesc().isSelect() && "MI must be a select instruction"); + assert((Opc == SystemZ::LOCHIMux || Opc == SystemZ::LOCGHI) && + "Unexpected opcode"); + + // Check that the conditionally loaded value is 1. + if (MI.getOperand(2).getImm() != 1) + return nullptr; + + // Check that the incoming source value is a loaded immediate zero. + MachineInstr *SrcMI = MRI->getVRegDef(MI.getOperand(1).getReg()); + unsigned SrcOpc = SrcMI->getOpcode(); + if ((SrcOpc != SystemZ::LHIMux && SrcOpc != SystemZ::LGHI) || + SrcMI->getOperand(1).getImm() != 0) + return nullptr; + + // Scan backwards in MBB and find the CC definition. + MachineInstr *CmpMI = nullptr; + for (MachineBasicBlock::iterator II = MI.getIterator(); + II != MBB->begin();) + if ((--II)->definesRegister(SystemZ::CC)) { + CmpMI = &*II; + break; + } + if (CmpMI == nullptr) + return nullptr; + unsigned CmpOpcode = CmpMI->getOpcode(); + if (CmpOpcode != SystemZ::CGHI && + (CmpOpcode != SystemZ::CHIMux || Opc != SystemZ::LOCHIMux)) + return nullptr; + + // Check for a reusable known 0 or 1. + int64_t CmpImm = CmpMI->getOperand(1).getImm(); + int64_t CCMask = MI.getOperand(4).getImm(); + bool NE0Case = CCMask == SystemZ::CCMASK_CMP_NE && CmpImm == 0; + bool EQ1Case = CCMask == SystemZ::CCMASK_CMP_EQ && CmpImm == 1; + if (!NE0Case && !EQ1Case) + return nullptr; + + MachineOperand &CmpSrcMO = CmpMI->getOperand(0); + if (!MRI->hasOneNonDBGUse(CmpSrcMO.getReg()) && !MultipleCmpOpUsers) + return nullptr; + + unsigned PseudoOpc = + MI.getOpcode() == SystemZ::LOCGHI ? SystemZ::LOCGHI_Pseudo_3 + : SystemZ::LOCHIMux_Pseudo_3; + if (!LOCIPseudos) + PseudoOpc = MI.getOpcode(); + MachineInstrBuilder MIB = + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PseudoOpc)) + .add(MI.getOperand(0)) + .add(CmpSrcMO) + .add(MI.getOperand(2)) + .add(MI.getOperand(3)) + .add(MI.getOperand(4)); + CmpSrcMO.setIsKill(false); + if (CmpOpcode == SystemZ::CGHI && MI.getOpcode() == SystemZ::LOCHIMux) + MIB->getOperand(1).setSubReg(SystemZ::subreg_l32); + if (EQ1Case) { + MIB->getOperand(2).setImm(0); + MIB->getOperand(4).setImm(SystemZ::CCMASK_CMP_NE); + } + + return MIB; +} + void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DestReg, @@ -1384,6 +1492,14 @@ expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI); return true; + case SystemZ::LOCGHI_Pseudo_3: + expandLOCImmPseudo(MI, SystemZ::LOCGHI, SystemZ::LGHI); + return true; + + case SystemZ::LOCHIMux_Pseudo_3: + expandLOCImmPseudo(MI, SystemZ::LOCHIMux, SystemZ::LHIMux); + return true; + case SystemZ::STCMux: expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH); return true; Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.td +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.td @@ -551,10 +551,23 @@ // by the PeepholeOptimizer via FoldImmediate. // Expands to LOCHI or LOCHHI, depending on the choice of register. - def LOCHIMux : CondBinaryRIEPseudo; + let isSelect = 1, NumOpsKey = "lochimux", NumOpsValue = "2" in + def LOCHIMux : CondBinaryRIEPseudo; defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>; defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>; - defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; + let isSelect = 1 in + defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>; + + // 3-address pseudos inserted by optimizeSelect() in certain cases. + // TODO: Merge these definitions with the class for LOCHIMux... + let CCMaskLast = 1, NumOpsValue = "3" in { + let NumOpsKey = "locghi" in + def LOCGHI_Pseudo_3 : Pseudo<(outs GR64:$R1), + (ins GR64:$R2, imm64sx16:$I3, cond4:$valid, cond4:$M4), []>; + let NumOpsKey = "lochimux" in + def LOCHIMux_Pseudo_3 : Pseudo<(outs GR32:$R1), + (ins GR32:$R2, imm64sx16:$I3, cond4:$valid, cond4:$M4), []>; + } // Move register on condition. Matched via DAG pattern and // created by early if-conversion. Index: llvm/test/CodeGen/SystemZ/setcc-05.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/setcc-05.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test SETCC for an integer comparison against 0. The 0 does not need to be +; loaded if the condition is NE. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; ICMP NE 0: no need to load 0. +define i32 @fun0(i8 zeroext %b) { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i8 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 0: need to load 0. +define i32 @fun2(i8 zeroext %b) { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 0 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochie %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i8 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: The whole register is not checked, so need to load 0. +define i32 @fun3(i32 %b) { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 255 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochine %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %t = trunc i32 %b to i8 + %cc = icmp ne i8 %t, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: i64 with i32 use +define i32 @fun4(i64 %b) { +; CHECK-LABEL: fun4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i64 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: i64 with i64 use. +define i64 @fun5(i64 %b) { +; CHECK-LABEL: fun5: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: cghi %r2, 0 +; CHECK-NEXT: locghilh %r2, 1 +; CHECK-NEXT: br %r14 +bb: + %cc = icmp ne i64 %b, 0 + %conv = zext i1 %cc to i64 + ret i64 %conv +} + +; ICMP EQ 1: no need to load 1. +define i32 @fun6(i8 zeroext %b) { +; CHECK-LABEL: fun6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 1 +; CHECK-NEXT: lochilh %r2, 0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i8 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 1: need to load 1. +define i32 @fun7(i8 zeroext %b) { +; CHECK-LABEL: fun7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 1 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i8 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 1: i64 with i32 use +define i32 @fun8(i64 %b) { +; CHECK-LABEL: fun8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 1 +; CHECK-NEXT: lochilh %r2, 0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i64 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 1: i64 with i64 use +define i64 @fun9(i64 %b) { +; CHECK-LABEL: fun9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 1 +; CHECK-NEXT: locghilh %r2, 0 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i64 %b, 1 + %conv = zext i1 %cc to i64 + ret i64 %conv +}