Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -636,6 +636,7 @@ bool Force) const; SDValue combineTruncateExtract(const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const; + bool isSETCCWithReusable64BitOp(SDValue Op, SelectionDAG &DAG) const; SDValue combineZERO_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const; Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2692,16 +2692,23 @@ Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); } -// Return an i32 value that is 1 if the CC value produced by CCReg is -// in the mask CCMask and 0 otherwise. CC is known to have a value -// in CCValid, so other values can be ignored. +// Return a value that is 1 if the CC value produced by CCReg is in the mask +// CCMask and 0 otherwise. CC is known to have a value in CCValid, so other +// values can be ignored. static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, - unsigned CCValid, unsigned CCMask) { - SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), + unsigned CCValid, unsigned CCMask, + SDValue ZeroOp = SDValue(), SDValue OneOp = SDValue()) { + EVT VT = ZeroOp != SDValue() ? ZeroOp->getValueType(0) + : OneOp != SDValue() ? OneOp->getValueType(0) + : MVT::i32; + + SDValue Ops[] = {OneOp == SDValue() ? DAG.getConstant(1, DL, VT) + : OneOp, + ZeroOp == SDValue() ? DAG.getConstant(0, DL, VT) + : ZeroOp, DAG.getTargetConstant(CCValid, DL, MVT::i32), DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; - return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); + return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); } // Return the SystemISD vector comparison operation for CC, or 0 if it cannot @@ -2909,11 +2916,36 @@ return Cmp; } +static bool isZeroExtended(SDValue Op) { + for (SDNode *Node : Op->uses()) + if (Node->getOpcode() == ISD::ZERO_EXTEND) + return true; + return false; +} + +// Need to avoid truncating an AssertSext to a more narrow type. +static SDValue getTruncatedCmpOp(SDValue Op, SelectionDAG &DAG) { + if (Op->getOpcode() == ISD::AssertSext) + Op = Op->getOperand(0); + return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), MVT::i32, Op); +} + +// EXPERIMENTAL +#include "llvm/Support/CommandLine.h" +static cl::opt MultipleCmpOpUsers("setcc-multiple-users", cl::init(true)); + +static bool cmpOpHasOneUser(SDValue Op) { + if (Op.hasOneUse() && isa(Op)) + return false; + return (Op.hasOneUse() || MultipleCmpOpUsers); +} + SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue CmpOp0 = Op.getOperand(0); SDValue CmpOp1 = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(2))->get(); + bool CmpOpOneUser = cmpOpHasOneUser(CmpOp0); SDLoc DL(Op); EVT VT = Op.getValueType(); if (VT.isVector()) @@ -2921,7 +2953,49 @@ Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); SDValue CCReg = emitCmp(DAG, DL, C); - return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); + + // Detemine if either 0 or 1 is the known the value after the comparison + // and see if it can be reused instead of loading it again. For instance, + // in the case of a comparison against 0 for inequality, there is no need + // to load the zero constant into a register. + SDValue ZeroOp = SDValue(); + SDValue OneOp = SDValue(); + auto *Imm = dyn_cast(CmpOp1); + if (Imm != nullptr && C.Opcode == SystemZISD::ICMP && CmpOpOneUser) { + EVT CmpVT = CmpOp0->getValueType(0); + if (VT == MVT::i32 && !isZeroExtended(Op)) { + if (CC == ISD::SETNE && Imm->getZExtValue() == 0) { + if (CmpVT == MVT::i32) + ZeroOp = CmpOp0; + else if (CmpVT == MVT::i64) + // i64 comparison: all users i32. + ZeroOp = getTruncatedCmpOp(CmpOp0, DAG); + } else if (CC == ISD::SETEQ && Imm->getZExtValue() == 1) { + if (CmpVT == MVT::i32) + OneOp = CmpOp0; + else if (CmpVT == MVT::i64) + // i64 comparison: all users i32. + OneOp = getTruncatedCmpOp(CmpOp0, DAG); + } + } else if (VT == MVT::i64) { + // i64 comparison: all users i64. + if (CC == ISD::SETNE && Imm->getZExtValue() == 0) + ZeroOp = CmpOp0; + else if (CC == ISD::SETEQ && Imm->getZExtValue() == 1) + OneOp = CmpOp0; + } + if (ZeroOp != SDValue()) + ZeroOp = DAG.getNode(ISD::AssertZext, DL, VT, ZeroOp, + DAG.getValueType(MVT::i1)); + if (OneOp != SDValue()) + OneOp = DAG.getNode(ISD::AssertZext, DL, VT, OneOp, + DAG.getValueType(MVT::i1)); + } + assert((VT == MVT::i32 || + (VT == MVT::i64 && ((ZeroOp != SDValue()) != (OneOp != SDValue())))) && + "Unexpected i64 setcc"); + + return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask, ZeroOp, OneOp); } SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, @@ -5860,13 +5934,39 @@ return SDValue(); } +bool SystemZTargetLowering::isSETCCWithReusable64BitOp(SDValue Op, + SelectionDAG &DAG) const { + SDValue CmpOp0 = Op.getOperand(0); + SDValue CmpOp1 = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + if (!cmpOpHasOneUser(CmpOp0)) + return false; + SDLoc DL(Op); + EVT CmpVT = CmpOp0->getValueType(0); + if (CmpVT != MVT::i64) + return false; + auto *Imm = dyn_cast(CmpOp1); + if (!Imm) + return false; + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); + if (C.Opcode != SystemZISD::ICMP || !Op.hasOneUse()) + return false; + + if ((CC == ISD::SETNE && Imm->getZExtValue() == 0) || + (CC == ISD::SETEQ && Imm->getZExtValue() == 1)) + return true; + + return false; +} + SDValue SystemZTargetLowering::combineZERO_EXTEND( SDNode *N, DAGCombinerInfo &DCI) const { // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') SelectionDAG &DAG = DCI.DAG; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { + if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { // XXX still needed? auto *TrueOp = dyn_cast(N0.getOperand(0)); auto *FalseOp = dyn_cast(N0.getOperand(1)); if (TrueOp && FalseOp) { @@ -5884,6 +5984,13 @@ return NewSelect; } } + + if (N0.getOpcode() == ISD::SETCC && + VT == MVT::i64 && N0->getValueType(0) == MVT::i32 && + isSETCCWithReusable64BitOp(N0, DAG)) + return DAG.getNode(ISD::SETCC, SDLoc(N0), MVT::i64, + N0->getOperand(0), N0->getOperand(1), N0->getOperand(2)); + return SDValue(); } Index: llvm/test/CodeGen/SystemZ/setcc-05.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/setcc-05.ll @@ -0,0 +1,130 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; Test SETCC for an integer comparison against 0. The 0 does not need to be +; loaded if the condition is NE. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; ICMP NE 0: no need to load 0. +define i32 @fun0(i8 zeroext %b) { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i8 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 0: need to load 0. +define i32 @fun2(i8 zeroext %b) { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 0 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochie %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i8 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: The whole register is not checked, so need to load 0. +define i32 @fun3(i32 %b) { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: tmll %r2, 255 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochine %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %t = trunc i32 %b to i8 + %cc = icmp ne i8 %t, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: i64 with i32 use +define i32 @fun4(i64 %b) { +; CHECK-LABEL: fun4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i64 %b, 0 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 0: i64 with i64 use. +define i64 @fun5(i64 %b) { +; CHECK-LABEL: fun5: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: cghi %r2, 0 +; CHECK-NEXT: locghilh %r2, 1 +; CHECK-NEXT: br %r14 +bb: + %cc = icmp ne i64 %b, 0 + %conv = zext i1 %cc to i64 + ret i64 %conv +} + +; ICMP EQ 1: no need to load 1. +define i32 @fun6(i8 zeroext %b) { +; CHECK-LABEL: fun6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 1 +; CHECK-NEXT: lochilh %r2, 0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i8 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP NE 1: need to load 1. +define i32 @fun7(i8 zeroext %b) { +; CHECK-LABEL: fun7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: chi %r2, 1 +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: lochilh %r2, 1 +; CHECK-NEXT: br %r14 +entry: + %cc = icmp ne i8 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 1: i64 with i32 use +define i32 @fun8(i64 %b) { +; CHECK-LABEL: fun8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cghi %r2, 1 +; CHECK-NEXT: lochilh %r2, 0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 +entry: + %cc = icmp eq i64 %b, 1 + %conv = zext i1 %cc to i32 + ret i32 %conv +} + +; ICMP EQ 1: i64 with i64 use +define i64 @fun9(i64 %b) { +; CHECK-LABEL: fun9: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: cghi %r2, 1 +; CHECK-NEXT: locghilh %r2, 0 +; CHECK-NEXT: br %r14 +bb: + %cc = icmp eq i64 %b, 1 + %conv = zext i1 %cc to i64 + ret i64 %conv +}