Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -7802,9 +7802,11 @@ // When generating more than one CLC, all but the last will need to // branch to the end when a difference is found. - MachineBasicBlock *EndMBB = (ImmLength > 256 && Opcode == SystemZ::CLC - ? SystemZ::splitBlockAfter(MI, MBB) - : nullptr); + MachineBasicBlock *EndMBB = + (Opcode == SystemZ::CLC && + (ImmLength > 256 || LenMinus1Reg != SystemZ::NoRegister) + ? SystemZ::splitBlockAfter(MI, MBB) + : nullptr); // Check for the loop form, in which operand 5 is the trip count. if (MI.getNumExplicitOperands() > 5) { @@ -7846,8 +7848,8 @@ AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB); StartMBB = SystemZ::emitBlockAfter(MBB); LoopMBB = SystemZ::emitBlockAfter(StartMBB); - NextMBB = LoopMBB; - DoneMBB = SystemZ::emitBlockAfter(LoopMBB); + NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); + DoneMBB = SystemZ::emitBlockAfter(NextMBB); // MBB: // # Jump to AllDoneMBB if LenMinus1Reg is -1, or fall thru to StartMBB. @@ -7966,19 +7968,24 @@ : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg) .addReg(StartDestReg).addMBB(StartMBB) - .addReg(NextDestReg).addMBB(LoopMBB); + .addReg(NextDestReg).addMBB(NextMBB); if (!HaveSingleBase) BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg) .addReg(StartSrcReg).addMBB(StartMBB) - .addReg(NextSrcReg).addMBB(LoopMBB); + .addReg(NextSrcReg).addMBB(NextMBB); MRI.constrainRegClass(LenMinus1Reg, &SystemZ::ADDR64BitRegClass); - BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo)) - .addImm(Opcode) - .addReg(LenMinus1Reg) - .addReg(RemDestReg).addImm(DestDisp) - .addReg(RemSrcReg).addImm(SrcDisp); + MachineInstrBuilder EXRL_MIB = + BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo)) + .addImm(Opcode) + .addReg(LenMinus1Reg) + .addReg(RemDestReg).addImm(DestDisp) + .addReg(RemSrcReg).addImm(SrcDisp); MBB->addSuccessor(AllDoneMBB); MBB = AllDoneMBB; + if (EndMBB) { + EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine); + MBB->addLiveIn(SystemZ::CC); + } } } @@ -8512,6 +8519,7 @@ return emitMemMemWrapper(MI, MBB, SystemZ::XC); case SystemZ::CLCSequence: case SystemZ::CLCLoop: + case SystemZ::CLCLoopVarLen: return emitMemMemWrapper(MI, MBB, SystemZ::CLC); case SystemZ::CLSTLoop: return emitStringWrapper(MI, MBB, SystemZ::CLST); Index: llvm/lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrFormats.td +++ llvm/lib/Target/SystemZ/SystemZInstrFormats.td @@ -5365,6 +5365,10 @@ imm64:$length, GR64:$count256), [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src, imm64:$length, GR64:$count256))]>; + def LoopVarLen : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src, + GR64:$length, GR64:$count256), + [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src, + GR64:$length, GR64:$count256))]>; } } Index: llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -189,19 +189,35 @@ return SRA; } +static SDValue emitMemMemVarLen(SelectionDAG &DAG, const SDLoc &DL, + unsigned Loop, SDValue Chain, SDValue Dst, + SDValue Src, SDValue Size) { + SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64, + DAG.getZExtOrTrunc(Size, DL, MVT::i64), + DAG.getConstant(-1, DL, MVT::i64)); + SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1, + DAG.getConstant(8, DL, MVT::i64)); + SDVTList VTs = Loop == SystemZISD::CLC_LOOP + ? DAG.getVTList(MVT::i32, MVT::Other) + : DAG.getVTList(MVT::Other); + return DAG.getNode(Loop, DL, VTs, Chain, Dst, Src, LenMinus1, TripC); +} + std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, MachinePointerInfo Op2PtrInfo) const { + SDValue CCReg; + // Swap operands to invert CC == 1 vs. CC == 2 cases. if (auto *CSize = dyn_cast(Size)) { uint64_t Bytes = CSize->getZExtValue(); assert(Bytes > 0 && "Caller should have handled 0-size case"); - // Swap operands to invert CC == 1 vs. CC == 2 cases. - SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes); - Chain = CCReg.getValue(1); - return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain); - } - return std::make_pair(SDValue(), SDValue()); + CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes); + } else + CCReg = emitMemMemVarLen(DAG, DL, SystemZISD::CLC_LOOP, Chain, Src2, Src1, + Size); + Chain = CCReg.getValue(1); + return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain); } std::pair SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( Index: llvm/test/CodeGen/SystemZ/memcmp-01.ll =================================================================== --- llvm/test/CodeGen/SystemZ/memcmp-01.ll +++ llvm/test/CodeGen/SystemZ/memcmp-01.ll @@ -219,3 +219,30 @@ %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769) ret i32 %res } + +define i32 @f14(i8 *%src1, i8 *%src2, i64 %Len) { +; CHECK-LABEL: f14: +; CHECK: # %bb.0: +; CHECK-NEXT: aghi %r4, -1 +; CHECK-NEXT: cghi %r4, -1 +; CHECK-NEXT: je .LBB13_5 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: srlg %r0, %r4, 8 +; CHECK-NEXT: cgije %r0, 0, .LBB13_4 +; CHECK-NEXT: .LBB13_2: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: clc 0(256,%r3), 0(%r2) +; CHECK-NEXT: jlh .LBB13_5 +; CHECK-NEXT: # %bb.3: # in Loop: Header=BB13_2 Depth=1 +; CHECK-NEXT: la %r3, 256(%r3) +; CHECK-NEXT: la %r2, 256(%r2) +; CHECK-NEXT: brctg %r0, .LBB13_2 +; CHECK-NEXT: .LBB13_4: +; CHECK-NEXT: exrl %r4, .Ltmp0 +; CHECK-NEXT: .LBB13_5: +; CHECK-NEXT: ipm %r2 +; CHECK-NEXT: sll %r2, 2 +; CHECK-NEXT: sra %r2, 30 +; CHECK-NEXT: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 %Len) + ret i32 %res +}