Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -79,6 +79,9 @@ // Operand 3 is the flag operand. SELECT_CCMASK, + // Implements a 1/0/-1 integer result based on CC. + SELECT_CMP, + // Evaluates to the gap between the stack pointer and the // base of the dynamically-allocatable area. ADJDYNALLOC, Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -4613,6 +4613,7 @@ OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); + OPCODE(SELECT_CMP); OPCODE(ADJDYNALLOC); OPCODE(EXTRACT_ACCESS); OPCODE(POPCNT); Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -2753,6 +2753,15 @@ let Uses = [CC]; } +// Implements 1/0/-1 integer result based on CC, for use in strcmp and memcmp +// expansions. +class SelectCmpWrapper + : Pseudo<(outs cls:$dst), + (ins), + [(set cls:$dst, (z_select_cmp))]> { + let Uses = [CC]; +} + // Stores $new to $addr if $cc is true ("" case) or false (Inv case). multiclass CondStores { Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -148,6 +148,7 @@ void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, unsigned LowLowOpcode, unsigned Size, bool KillSrc) const; + void expandSelectCmp(MachineInstr &MI) const; virtual void anchor(); public: Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -222,6 +222,26 @@ .addImm(32 - Size).addImm(128 + 31).addImm(Rotate); } +void SystemZInstrInfo::expandSelectCmp(MachineInstr &MI) const { + const SystemZInstrInfo *TII = + static_cast(STI.getInstrInfo()); + + MachineBasicBlock *MBB = MI.getParent(); + unsigned DestReg = MI.getOperand(0).getReg(); + DebugLoc DL = MI.getDebugLoc(); + + BuildMI(*MBB, MI, DL, TII->get(SystemZ::IPM)) + .addReg(DestReg, RegState::Define); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::SLL)) + .addReg(DestReg, RegState::Define).addReg(DestReg, RegState::Kill) + .addReg(0).addImm(2); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRA)) + .addReg(DestReg, RegState::Define).addReg(DestReg, RegState::Kill) + .addReg(0).addImm(30); + + MI.eraseFromParent(); +} + // If MI is a simple load or store for a frame object, return the register // it loads or stores and set FrameIndex to the index of the frame object. // Return 0 otherwise. @@ -467,6 +487,37 @@ MI->eraseFromParent(); } +static bool removeSelectCmpCompare(MachineInstr &Compare, unsigned SrcReg, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) { + MachineInstr *LGFR = nullptr; + MachineInstr *SELCMP = getDef(SrcReg, MRI); + if (SELCMP && SELCMP->getOpcode() == SystemZ::LGFR) { + LGFR = SELCMP; + SELCMP = getDef(LGFR->getOperand(1).getReg(), MRI); + } + + if (SELCMP && SELCMP->getOpcode() != SystemZ::SelectCmp32) + return false; + + // Check that there are no assignments to CC between SelectCmp and Compare + if (SELCMP->getParent() != Compare.getParent()) + return false; + MachineBasicBlock::iterator MBBI = SELCMP, MBBE = Compare.getIterator(); + for (++MBBI; MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; + if (MI.modifiesRegister(SystemZ::CC, TRI)) + return false; + } + + Compare.eraseFromParent(); + if (LGFR) + eraseIfDead(LGFR, MRI); + eraseIfDead(SELCMP, MRI); + + return true; +} + // Compare compares SrcReg against zero. Check whether SrcReg contains // the result of an IPM sequence whose input CC survives until Compare, // and whether Compare is therefore redundant. Delete it and return @@ -516,6 +567,11 @@ int Value, const MachineRegisterInfo *MRI) const { assert(!SrcReg2 && "Only optimizing constant comparisons so far"); bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0; + if (Value == 0 && + !IsLogical && + removeSelectCmpCompare(Compare, SrcReg, MRI, &RI)) { + return true; + } return Value == 0 && !IsLogical && removeIPMBasedCompare(Compare, SrcReg, MRI, &RI); } @@ -1033,6 +1089,10 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { switch (MI.getOpcode()) { + case SystemZ::SelectCmp32: + expandSelectCmp(MI); + return true; + case SystemZ::L128: splitMove(MI, SystemZ::LG); return true; Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -346,6 +346,8 @@ def Select32 : SelectWrapper; def Select64 : SelectWrapper; +def SelectCmp32 : SelectCmpWrapper; + // We don't define 32-bit Mux stores because the low-only STOC should // always be used if possible. defm CondStore8Mux : CondStores; def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, [SDNPInGlue]>; +def z_select_cmp : SDNode<"SystemZISD::SELECT_CMP", SDT_ZI32Intrinsic, + [SDNPInGlue]>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", SDT_ZExtractAccess>; Index: lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -163,20 +163,6 @@ DAG.getConstant(Size, DL, PtrVT)); } -// Convert the current CC value into an integer that is 0 if CC == 0, -// less than zero if CC == 1 and greater than zero if CC >= 2. -// The sequence starts with IPM, which puts CC into bits 29 and 28 -// of an integer and clears bits 30 and 31. -static SDValue addIPMSequence(const SDLoc &DL, SDValue Glue, - SelectionDAG &DAG) { - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); - SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); - SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, - DAG.getConstant(31, DL, MVT::i32)); - return ROTL; -} - std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, @@ -186,7 +172,8 @@ assert(Bytes > 0 && "Caller should have handled 0-size case"); Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); SDValue Glue = Chain.getValue(1); - return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); + SDValue SelCmp = DAG.getNode(SystemZISD::SELECT_CMP, DL, MVT::i32, Glue); + return std::make_pair(SelCmp, Chain); } return std::make_pair(SDValue(), SDValue()); } @@ -237,7 +224,8 @@ DAG.getConstant(0, DL, MVT::i32)); Chain = Unused.getValue(1); SDValue Glue = Chain.getValue(2); - return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); + SDValue SelCmp = DAG.getNode(SystemZISD::SELECT_CMP, DL, MVT::i32, Glue); + return std::make_pair(SelCmp, Chain); } // Search from Src for a null character, stopping once Src reaches Limit. Index: test/CodeGen/SystemZ/memcmp-01.ll =================================================================== --- test/CodeGen/SystemZ/memcmp-01.ll +++ test/CodeGen/SystemZ/memcmp-01.ll @@ -18,8 +18,8 @@ ; CHECK-LABEL: f2: ; CHECK: clc 0(2,%r2), 0(%r3) ; CHECK: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll %r2, [[REG]], 31 +; CHECK: sll [[REG]], 2 +; CHECK: sra [[REG]], 30 ; CHECK: br %r14 %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2) ret i32 %res @@ -106,8 +106,8 @@ ; CHECK-LABEL: f7: ; CHECK: clc 0(256,%r2), 0(%r3) ; CHECK: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll %r2, [[REG]], 31 +; CHECK: sll [[REG]], 2 +; CHECK: sra [[REG]], 30 ; CHECK: blr %r14 ; CHECK: br %r14 entry: Index: test/CodeGen/SystemZ/memcmp-02.ll =================================================================== --- test/CodeGen/SystemZ/memcmp-02.ll +++ test/CodeGen/SystemZ/memcmp-02.ll @@ -18,8 +18,8 @@ ; CHECK-LABEL: f2: ; CHECK: clc 0(2,%r2), 0(%r3) ; CHECK: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: sll [[REG]], 2 +; CHECK: sra [[REG]], 30 ; CHECK: lgfr %r2, [[REG]] ; CHECK: br %r14 %res = call i64 @memcmp(i8 *%src1, i8 *%src2, i64 2) @@ -107,8 +107,8 @@ ; CHECK-LABEL: f7: ; CHECK: clc 0(256,%r2), 0(%r3) ; CHECK: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: sll [[REG]], 2 +; CHECK: sra [[REG]], 30 ; CHECK: lgfr %r2, [[REG]] ; CHECK: blr %r14 ; CHECK: br %r14 Index: test/CodeGen/SystemZ/strcmp-01.ll =================================================================== --- test/CodeGen/SystemZ/strcmp-01.ll +++ test/CodeGen/SystemZ/strcmp-01.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: jo [[LABEL]] ; CHECK-NEXT: BB#{{[0-9]+}} ; CHECK-NEXT: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll %r2, [[REG]], 31 +; CHECK: sll [[REG]], 2 +; CHECK: sra [[REG]], 30 ; CHECK: br %r14 %res = call i32 @strcmp(i8 *%src1, i8 *%src2) ret i32 %res @@ -52,8 +52,8 @@ ; CHECK-NEXT: jo [[LABEL]] ; CHECK-NEXT: BB#{{[0-9]+}} ; CHECK-NEXT: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll %r2, [[REG]], 31 +; CHECK: sll [[REG]], 2 +; CHECK: sra [[REG]], 30 ; CHECK: blr %r14 ; CHECK: br %r14 entry: Index: test/CodeGen/SystemZ/strcmp-02.ll =================================================================== --- test/CodeGen/SystemZ/strcmp-02.ll +++ test/CodeGen/SystemZ/strcmp-02.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: jo [[LABEL]] ; CHECK-NEXT: BB#{{[0-9]+}} ; CHECK-NEXT: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: sll [[REG]], 2 +; CHECK: sra [[REG]], 30 ; CHECK: lgfr %r2, [[REG]] ; CHECK: br %r14 %res = call i64 @strcmp(i8 *%src1, i8 *%src2) @@ -53,8 +53,8 @@ ; CHECK-NEXT: jo [[LABEL]] ; CHECK-NEXT: BB#{{[0-9]+}} ; CHECK-NEXT: ipm [[REG:%r[0-5]]] -; CHECK: srl [[REG]], 28 -; CHECK: rll [[REG]], [[REG]], 31 +; CHECK: sll [[REG]], 2 +; CHECK: sra [[REG]], 30 ; CHECK: lgfr %r2, [[REG]] ; CHECK: blr %r14 ; CHECK: br %r14