Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -79,6 +79,9 @@ // Operand 3 is the flag operand. SELECT_CCMASK, + // Implements a 1/0/-1 integer result based on CC. + SELECT_CMP, + // Evaluates to the gap between the stack pointer and the // base of the dynamically-allocatable area. ADJDYNALLOC, @@ -565,6 +568,8 @@ // Implement EmitInstrWithCustomInserter for individual operation types. MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *emitSelectCmp(MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -4590,6 +4590,7 @@ OPCODE(TM); OPCODE(BR_CCMASK); OPCODE(SELECT_CCMASK); + OPCODE(SELECT_CMP); OPCODE(ADJDYNALLOC); OPCODE(EXTRACT_ACCESS); OPCODE(POPCNT); @@ -5177,6 +5178,35 @@ return Reg; } +MachineBasicBlock * +SystemZTargetLowering::emitSelectCmp(MachineInstr &MI, + MachineBasicBlock *MBB) const { + if (Subtarget.hasLoadStoreOnCond2()) + return MBB; + + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + DebugLoc DL = MI.getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned IpmReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + unsigned SrlReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + unsigned RotByReg = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::IPM)) + .addReg(IpmReg, RegState::Define); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRL)) + .addReg(SrlReg, RegState::Define).addReg(IpmReg).addReg(0) + .addImm(SystemZ::IPM_CC); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LHI)) + .addReg(RotByReg, RegState::Define).addImm(31); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::RLL)) + .addReg(DestReg, RegState::Define).addReg(SrlReg).addReg(0).addImm(31); + MI.eraseFromParent(); + return MBB; +} + // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr &MI, @@ -5983,6 +6013,9 @@ case SystemZ::SelectF128: return emitSelect(MI, MBB); + case SystemZ::SelectCmp32: + return emitSelectCmp(MI, MBB); + case SystemZ::CondStore8Mux: return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); case SystemZ::CondStore8MuxInv: Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -2753,6 +2753,16 @@ let Uses = [CC]; } +// Implements 1/0/-1 integer result based on CC, for use in strcmp and memcmp +// expansions. +class SelectCmpWrapper + : Pseudo<(outs cls:$dst), + (ins), + [(set cls:$dst, (z_select_cmp))]> { + let usesCustomInserter = 1; + let Uses = [CC]; +} + // Stores $new to $addr if $cc is true ("" case) or false (Inv case). multiclass CondStores { Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -148,6 +148,7 @@ void emitGRX32Move(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, unsigned LowLowOpcode, unsigned Size, bool KillSrc) const; + void expandSelectCmp(MachineInstr &MI) const; virtual void anchor(); public: Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -222,6 +222,30 @@ .addImm(32 - Size).addImm(128 + 31).addImm(Rotate); } +void SystemZInstrInfo::expandSelectCmp(MachineInstr &MI) const { + const SystemZInstrInfo *TII = + static_cast(STI.getInstrInfo()); + + MachineBasicBlock *MBB = MI.getParent(); + unsigned DestReg = MI.getOperand(0).getReg(); + DebugLoc DL = MI.getDebugLoc(); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LHI)) + .addReg(DestReg, RegState::Define).addImm(0); + BuildMI(*MBB, MI, DL, TII->get(SystemZ::LOCHI)) + .addReg(DestReg, RegState::Define).addImm(1) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_GT) + .addReg(DestReg, RegState::Implicit | RegState::Kill); + MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LOCHI)) + .addReg(DestReg, RegState::Define).addImm(-1) + .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) + .addReg(DestReg, RegState::Implicit | RegState::Kill); + if (MI.getOperand(1).isKill()) { + MachineInstr* Last = MIB; + Last->getOperand(4).setIsKill(true); + } + MI.eraseFromParent(); +} + // If MI is a simple load or store for a frame object, return the register // it loads or stores and set FrameIndex to the index of the frame object. // Return 0 otherwise. @@ -467,6 +491,37 @@ MI->eraseFromParent(); } +static bool removeSelectCmpCompare(MachineInstr &Compare, unsigned SrcReg, + const MachineRegisterInfo *MRI, + const TargetRegisterInfo *TRI) { + MachineInstr *LGFR = nullptr; + MachineInstr *SELCMP = getDef(SrcReg, MRI); + if (SELCMP && SELCMP->getOpcode() == SystemZ::LGFR) { + LGFR = SELCMP; + SELCMP = getDef(LGFR->getOperand(1).getReg(), MRI); + } + + if (SELCMP && SELCMP->getOpcode() != SystemZ::SelectCmp32) + return false; + + // Check that there are no assignments to CC between SelectCmp and Compare + if (SELCMP->getParent() != Compare.getParent()) + return false; + MachineBasicBlock::iterator MBBI = SELCMP, MBBE = Compare.getIterator(); + for (++MBBI; MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; + if (MI.modifiesRegister(SystemZ::CC, TRI)) + return false; + } + + Compare.eraseFromParent(); + if (LGFR) + eraseIfDead(LGFR, MRI); + eraseIfDead(SELCMP, MRI); + + return true; +} + // Compare compares SrcReg against zero. Check whether SrcReg contains // the result of an IPM sequence whose input CC survives until Compare, // and whether Compare is therefore redundant. Delete it and return @@ -516,6 +571,11 @@ int Value, const MachineRegisterInfo *MRI) const { assert(!SrcReg2 && "Only optimizing constant comparisons so far"); bool IsLogical = (Compare.getDesc().TSFlags & SystemZII::IsLogical) != 0; + if (Value == 0 && + !IsLogical && + removeSelectCmpCompare(Compare, SrcReg, MRI, &RI)) { + return true; + } return Value == 0 && !IsLogical && removeIPMBasedCompare(Compare, SrcReg, MRI, &RI); } @@ -1033,6 +1093,10 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { switch (MI.getOpcode()) { + case SystemZ::SelectCmp32: + expandSelectCmp(MI); + return true; + case SystemZ::L128: splitMove(MI, SystemZ::LG); return true; Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -346,6 +346,8 @@ def Select32 : SelectWrapper; def Select64 : SelectWrapper; +def SelectCmp32 : SelectCmpWrapper; + // We don't define 32-bit Mux stores because the low-only STOC should // always be used if possible. defm CondStore8Mux : CondStores; def z_select_ccmask : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask, [SDNPInGlue]>; +def z_select_cmp : SDNode<"SystemZISD::SELECT_CMP", SDT_ZI32Intrinsic, + [SDNPInGlue]>; def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", SDT_ZExtractAccess>; Index: lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -163,20 +163,6 @@ DAG.getConstant(Size, DL, PtrVT)); } -// Convert the current CC value into an integer that is 0 if CC == 0, -// less than zero if CC == 1 and greater than zero if CC >= 2. -// The sequence starts with IPM, which puts CC into bits 29 and 28 -// of an integer and clears bits 30 and 31. -static SDValue addIPMSequence(const SDLoc &DL, SDValue Glue, - SelectionDAG &DAG) { - SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); - SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, - DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); - SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, - DAG.getConstant(31, DL, MVT::i32)); - return ROTL; -} - std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, @@ -186,7 +172,8 @@ assert(Bytes > 0 && "Caller should have handled 0-size case"); Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); SDValue Glue = Chain.getValue(1); - return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); + SDValue SelCmp = DAG.getNode(SystemZISD::SELECT_CMP, DL, MVT::i32, Glue); + return std::make_pair(SelCmp, Chain); } return std::make_pair(SDValue(), SDValue()); } @@ -237,7 +224,8 @@ DAG.getConstant(0, DL, MVT::i32)); Chain = Unused.getValue(1); SDValue Glue = Chain.getValue(2); - return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); + SDValue SelCmp = DAG.getNode(SystemZISD::SELECT_CMP, DL, MVT::i32, Glue); + return std::make_pair(SelCmp, Chain); } // Search from Src for a null character, stopping once Src reaches Limit. Index: test/CodeGen/SystemZ/z13strcmp-01.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/z13strcmp-01.ll @@ -0,0 +1,70 @@ +; Test strcmp using CLST, i32 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare signext i32 @strcmp(i8 *%src1, i8 *%src2) + +; Check a case where the result is used as an integer. +define i32 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: lhi [[REG:%r[0-5]]], 0 +; CHECK: lochih [[REG]], 1 +; CHECK: lochil [[REG]], -1 +; CHECK: br %r14 + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) + ret i32 %res +} + +; Check a case where the result is tested for equality. +define void @f2(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f2: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: ber %r14 +; CHECK: br %r14 + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Test a case where the result is used both as an integer and for +; branching. +define i32 @f3(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f3: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: lhi [[REG:%r[0-5]]], 0 +; CHECK: lochih [[REG]], 1 +; CHECK: lochil [[REG]], -1 +; CHECK: blr %r14 +; CHECK: br %r14 +entry: + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret i32 %res +} Index: test/CodeGen/SystemZ/z13strcmp-02.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/z13strcmp-02.ll @@ -0,0 +1,72 @@ +; Test strcmp using CLST, i64 version. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +declare i64 @strcmp(i8 *%src1, i8 *%src2) + +; Check a case where the result is used as an integer. +define i64 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: lhi [[REG:%r[0-5]]], 0 +; CHECK: lochih [[REG]], 1 +; CHECK: lochil [[REG]], -1 +; CHECK: lgfr %r2, [[REG]] +; CHECK: br %r14 + %res = call i64 @strcmp(i8 *%src1, i8 *%src2) + ret i64 %res +} + +; Check a case where the result is tested for equality. +define void @f2(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f2: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: ber %r14 +; CHECK: br %r14 + %res = call i64 @strcmp(i8 *%src1, i8 *%src2) + %cmp = icmp eq i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret void +} + +; Test a case where the result is used both as an integer and for +; branching. +define i64 @f3(i8 *%src1, i8 *%src2, i64 *%dest) { +; CHECK-LABEL: f3: +; CHECK: lhi %r0, 0 +; CHECK: [[LABEL:\.[^:]*]]: +; CHECK: clst %r2, %r3 +; CHECK-NEXT: jo [[LABEL]] +; CHECK-NEXT: BB#{{[0-9]+}} +; CHECK-NEXT: lhi [[REG:%r[0-5]]], 0 +; CHECK: lochih [[REG]], 1 +; CHECK: lochil [[REG]], -1 +; CHECK: lgfr %r2, [[REG]] +; CHECK: blr %r14 +; CHECK: br %r14 +entry: + %res = call i64 @strcmp(i8 *%src1, i8 *%src2) + %cmp = icmp slt i64 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i64 0, i64 *%dest + br label %exit + +exit: + ret i64 %res +}