Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.h +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.h @@ -312,6 +312,8 @@ SystemZII::FusedCompareType Type, const MachineInstr *MI = nullptr) const; + bool trySwapCompareOperands(MachineBasicBlock::iterator MBBI) const; + // If Opcode is a LOAD opcode for with an associated LOAD AND TRAP // operation exists, returh the opcode for the latter, otherwise return 0. unsigned getLoadAndTrap(unsigned Opcode) const; Index: llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1155,14 +1155,23 @@ // commutable, try to change R into . unsigned NumOps = MI.getNumExplicitOperands(); int MemOpcode = SystemZ::getMemOpcode(Opcode); + if (MemOpcode == -1) + return nullptr; + + // Try to swap compare operands if possible. + // TODO: CEB / CDB ? + if ((MI.getOpcode() == SystemZ::CR || MI.getOpcode() == SystemZ::CGR || + MI.getOpcode() == SystemZ::CLR || MI.getOpcode() == SystemZ::CLGR) && + OpNum == 0 && trySwapCompareOperands(MI)) + OpNum = 1; // See if this is a 3-address instruction that is convertible to 2-address // and suitable for folding below. Only try this with virtual registers // and a provided VRM (during regalloc). bool NeedsCommute = false; - if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) { + if (SystemZ::getTwoOperandOpcode(Opcode) != -1) { if (VRM == nullptr) - MemOpcode = -1; + return nullptr; else { assert(NumOps == 3 && "Expected two source registers."); Register DstReg = MI.getOperand(0).getReg(); @@ -1177,31 +1186,29 @@ DstPhys == VRM->getPhys(SrcReg)) NeedsCommute = (OpNum == 1); else - MemOpcode = -1; + return nullptr; } } - if (MemOpcode >= 0) { - if ((OpNum == NumOps - 1) || NeedsCommute) { - const MCInstrDesc &MemDesc = get(MemOpcode); - uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); - assert(AccessBytes != 0 && "Size of access should be known"); - assert(AccessBytes <= Size && "Access outside the frame index"); - uint64_t Offset = Size - AccessBytes; - MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, - MI.getDebugLoc(), get(MemOpcode)); - MIB.add(MI.getOperand(0)); - if (NeedsCommute) - MIB.add(MI.getOperand(2)); - else - for (unsigned I = 1; I < OpNum; ++I) - MIB.add(MI.getOperand(I)); - MIB.addFrameIndex(FrameIndex).addImm(Offset); - if (MemDesc.TSFlags & SystemZII::HasIndex) - MIB.addReg(0); - transferDeadCC(&MI, MIB); - return MIB; - } + if ((OpNum == NumOps - 1) || NeedsCommute) { + const MCInstrDesc &MemDesc = get(MemOpcode); + uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); + assert(AccessBytes != 0 && "Size of access should be known"); + assert(AccessBytes <= Size && "Access outside the frame index"); + uint64_t Offset = Size - AccessBytes; + MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, + MI.getDebugLoc(), get(MemOpcode)); + MIB.add(MI.getOperand(0)); + if (NeedsCommute) + MIB.add(MI.getOperand(2)); + else + for (unsigned I = 1; I < OpNum; ++I) + MIB.add(MI.getOperand(I)); + MIB.addFrameIndex(FrameIndex).addImm(Offset); + if (MemDesc.TSFlags & SystemZII::HasIndex) + MIB.addReg(0); + transferDeadCC(&MI, MIB); + return MIB; } return nullptr; @@ -1710,6 +1717,78 @@ return 0; } +bool SystemZInstrInfo:: +trySwapCompareOperands(MachineBasicBlock::iterator const MBBI) const { + assert(MBBI->isCompare() && MBBI->getOperand(0).isReg() && + MBBI->getOperand(1).isReg() && !MBBI->mayLoad() && + "Not a compare reg/reg."); + + MachineBasicBlock *MBB = MBBI->getParent(); + LivePhysRegs LiveRegs(*MBB->getParent()->getSubtarget().getRegisterInfo()); + LiveRegs.addLiveOuts(*MBB); + if (LiveRegs.contains(SystemZ::CC)) + return false; + + SmallVector CCUsers; + for (MachineBasicBlock::iterator Itr = std::next(MBBI); + Itr != MBB->end(); ++Itr) { + if (Itr->readsRegister(SystemZ::CC)) { + unsigned Flags = Itr->getDesc().TSFlags; + if ((Flags & SystemZII::CCMaskFirst) || (Flags & SystemZII::CCMaskLast)) + CCUsers.push_back(&*Itr); + else + return false; + } + if (Itr->definesRegister(SystemZ::CC)) + break; + } + assert(CCUsers.size() && "No CC users found?"); + + // Update all CC users. + for (unsigned Idx = 0; Idx < CCUsers.size(); ++Idx) { + unsigned Flags = CCUsers[Idx]->getDesc().TSFlags; + unsigned FirstOpNum = ((Flags & SystemZII::CCMaskFirst) ? + 0 : CCUsers[Idx]->getNumExplicitOperands() - 2); + MachineOperand &CCMaskMO = CCUsers[Idx]->getOperand(FirstOpNum + 1); + switch(CCMaskMO.getImm()) { + case SystemZ::CCMASK_CMP_LT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GT); + break; + case SystemZ::CCMASK_CMP_GT: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LT); + break; + case SystemZ::CCMASK_CMP_LE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_GE); + break; + case SystemZ::CCMASK_CMP_GE: + CCMaskMO.setImm(SystemZ::CCMASK_CMP_LE); + break; + case SystemZ::CCMASK_CMP_EQ: + case SystemZ::CCMASK_CMP_NE: + break; + default: + llvm_unreachable("Unexpected CCMask value."); + break; + } + } + + // Swap the registers and flags of the compare operands. MBBI is expected + // to remain without constructing a new one. + MachineOperand &LHS = MBBI->getOperand(0); + MachineOperand &RHS = MBBI->getOperand(1); + MachineOperand Tmp = MachineOperand(LHS); + LHS.setReg(RHS.getReg()); + LHS.setSubReg(RHS.getSubReg()); + LHS.setIsKill(RHS.isKill()); + LHS.setIsUndef(RHS.isUndef()); + RHS.setReg(Tmp.getReg()); + RHS.setSubReg(Tmp.getSubReg()); + RHS.setIsKill(Tmp.isKill()); + RHS.setIsUndef(Tmp.isUndef()); + + return true; +} + unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const { if (!STI.hasLoadAndTrap()) return 0; Index: llvm/test/CodeGen/SystemZ/int-cmp-56.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/int-cmp-56.mir @@ -0,0 +1,323 @@ +# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z14 -run-pass greedy %s -o - \ +# RUN: | FileCheck %s +# +# Test that a reload can be folded into a compare instruction after swapping +# operands (when the LHS register is spilled). + +--- | + declare i64 @foo() + define i64 @fun1(i64* %ptr0) { ret i64 0 } + define i64 @fun2(i64* %ptr0) { ret i64 0 } + + declare i32 @foo32() + define i32 @fun3(i32* %ptr0) { ret i32 0 } + define i32 @fun4(i32* %ptr0) { ret i32 0 } +... + + +# Test CGR -> CG +# CHECK: name: fun1 +# CHECK: CG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 8 from %stack.0) +# CHECK-NEXT: %12:gr64bit = LOCGHI %12, 8, 14, 12, implicit killed $cc +--- +name: fun1 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: gr64bit } + - { id: 2, class: gr64bit } + - { id: 3, class: gr64bit } + - { id: 4, class: gr64bit } + - { id: 5, class: gr64bit } + - { id: 6, class: gr64bit } + - { id: 7, class: gr64bit } + - { id: 8, class: gr64bit } + - { id: 9, class: gr64bit } + - { id: 10, class: gr64bit } + - { id: 11, class: gr64bit } + - { id: 12, class: gr64bit } + - { id: 13, class: gr64bit } + - { id: 14, class: gr64bit } + - { id: 15, class: gr64bit } + - { id: 16, class: gr64bit } + - { id: 17, class: gr64bit } + - { id: 18, class: gr64bit } + - { id: 19, class: gr64bit } +liveins: + - { reg: '$r2d', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d + + %0:addr64bit = COPY $r2d + %1:gr64bit = LG %0, 0, $noreg + %2:gr64bit = LG %0, 16, $noreg + %3:gr64bit = LG %0, 32, $noreg + %4:gr64bit = LG %0, 48, $noreg + %5:gr64bit = LG %0, 64, $noreg + %6:gr64bit = LG %0, 80, $noreg + %7:gr64bit = LG %0, 96, $noreg + %8:gr64bit = LG %0, 112, $noreg + %9:gr64bit = LG %0, 128, $noreg + ADJCALLSTACKDOWN 0, 0 + CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d + %10:gr64bit = COPY $r2d + ADJCALLSTACKUP 0, 0 + CGR %10, %1, implicit-def $cc + %12:gr64bit = COPY %10 + %12:gr64bit = LOCGHI %12, 0, 14, 10, implicit killed $cc + CGR %10, %2, implicit-def $cc + %12:gr64bit = LOCGHI %12, 1, 14, 10, implicit killed $cc + CGR %10, %3, implicit-def $cc + %12:gr64bit = LOCGHI %12, 2, 14, 10, implicit killed $cc + CGR %10, %4, implicit-def $cc + %12:gr64bit = LOCGHI %12, 3, 14, 10, implicit killed $cc + CGR %10, %5, implicit-def $cc + %12:gr64bit = LOCGHI %12, 4, 14, 10, implicit killed $cc + CGR %10, %6, implicit-def $cc + %12:gr64bit = LOCGHI %12, 5, 14, 10, implicit killed $cc + CGR %10, %7, implicit-def $cc + %12:gr64bit = LOCGHI %12, 6, 14, 10, implicit killed $cc + CGR %10, %8, implicit-def $cc + %12:gr64bit = LOCGHI %12, 7, 14, 10, implicit killed $cc + CGR %9, %10, implicit-def $cc + %12:gr64bit = LOCGHI %12, 8, 14, 10, implicit killed $cc + $r2d = COPY %12 + Return implicit $r2d +... + + +# Test CLGR -> CLG +# CHECK: name: fun2 +# CHECK: CLG %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 8 from %stack.0) +# CHECK-NEXT: %12:gr64bit = LOCGHI %12, 8, 14, 12, implicit killed $cc +--- +name: fun2 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: gr64bit } + - { id: 2, class: gr64bit } + - { id: 3, class: gr64bit } + - { id: 4, class: gr64bit } + - { id: 5, class: gr64bit } + - { id: 6, class: gr64bit } + - { id: 7, class: gr64bit } + - { id: 8, class: gr64bit } + - { id: 9, class: gr64bit } + - { id: 10, class: gr64bit } + - { id: 11, class: gr64bit } + - { id: 12, class: gr64bit } + - { id: 13, class: gr64bit } + - { id: 14, class: gr64bit } + - { id: 15, class: gr64bit } + - { id: 16, class: gr64bit } + - { id: 17, class: gr64bit } + - { id: 18, class: gr64bit } + - { id: 19, class: gr64bit } +liveins: + - { reg: '$r2d', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d + + %0:addr64bit = COPY $r2d + %1:gr64bit = LG %0, 0, $noreg + %2:gr64bit = LG %0, 16, $noreg + %3:gr64bit = LG %0, 32, $noreg + %4:gr64bit = LG %0, 48, $noreg + %5:gr64bit = LG %0, 64, $noreg + %6:gr64bit = LG %0, 80, $noreg + %7:gr64bit = LG %0, 96, $noreg + %8:gr64bit = LG %0, 112, $noreg + %9:gr64bit = LG %0, 128, $noreg + ADJCALLSTACKDOWN 0, 0 + CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d + %10:gr64bit = COPY $r2d + ADJCALLSTACKUP 0, 0 + CGR %10, %1, implicit-def $cc + %12:gr64bit = COPY %10 + %12:gr64bit = LOCGHI %12, 0, 14, 10, implicit killed $cc + CGR %10, %2, implicit-def $cc + %12:gr64bit = LOCGHI %12, 1, 14, 10, implicit killed $cc + CGR %10, %3, implicit-def $cc + %12:gr64bit = LOCGHI %12, 2, 14, 10, implicit killed $cc + CGR %10, %4, implicit-def $cc + %12:gr64bit = LOCGHI %12, 3, 14, 10, implicit killed $cc + CGR %10, %5, implicit-def $cc + %12:gr64bit = LOCGHI %12, 4, 14, 10, implicit killed $cc + CGR %10, %6, implicit-def $cc + %12:gr64bit = LOCGHI %12, 5, 14, 10, implicit killed $cc + CGR %10, %7, implicit-def $cc + %12:gr64bit = LOCGHI %12, 6, 14, 10, implicit killed $cc + CGR %10, %8, implicit-def $cc + %12:gr64bit = LOCGHI %12, 7, 14, 10, implicit killed $cc + CLGR %9, %10, implicit-def $cc + %12:gr64bit = LOCGHI %12, 8, 14, 10, implicit killed $cc + $r2d = COPY %12 + Return implicit $r2d +... + + +# Test CR -> C +# CHECK: name: fun3 +# CHECK: C %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 4 from %stack.0) +# CHECK: %12:gr32bit = LOCHIMux %12, 8, 14, 12, implicit killed $cc +--- +name: fun3 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: gr32bit } + - { id: 2, class: gr32bit } + - { id: 3, class: gr32bit } + - { id: 4, class: gr32bit } + - { id: 5, class: gr32bit } + - { id: 6, class: gr32bit } + - { id: 7, class: gr32bit } + - { id: 8, class: gr32bit } + - { id: 9, class: gr32bit } + - { id: 10, class: gr32bit } + - { id: 11, class: gr32bit } + - { id: 12, class: gr32bit } + - { id: 13, class: gr32bit } + - { id: 14, class: gr32bit } + - { id: 15, class: gr32bit } + - { id: 16, class: gr32bit } + - { id: 17, class: gr32bit } + - { id: 18, class: gr32bit } + - { id: 19, class: gr32bit } +liveins: + - { reg: '$r2d', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d + + %0:addr64bit = COPY $r2d + %1:gr32bit = LMux %0, 0, $noreg + %2:gr32bit = LMux %0, 8, $noreg + %3:gr32bit = LMux %0, 16, $noreg + %4:gr32bit = LMux %0, 24, $noreg + %5:gr32bit = LMux %0, 32, $noreg + %6:gr32bit = LMux %0, 40, $noreg + %7:gr32bit = LMux %0, 48, $noreg + %8:gr32bit = LMux %0, 56, $noreg + %9:gr32bit = LMux %0, 64, $noreg + ADJCALLSTACKDOWN 0, 0 + CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l + %10:gr32bit = COPY $r2l + ADJCALLSTACKUP 0, 0 + CR %10, %1, implicit-def $cc + %12:gr32bit = COPY %10 + %12:gr32bit = LOCHIMux %12, 0, 14, 10, implicit killed $cc + CR %10, %2, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 1, 14, 10, implicit killed $cc + CR %10, %3, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 2, 14, 10, implicit killed $cc + CR %10, %4, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 3, 14, 10, implicit killed $cc + CR %10, %5, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 4, 14, 10, implicit killed $cc + CR %10, %6, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 5, 14, 10, implicit killed $cc + CR %10, %7, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 6, 14, 10, implicit killed $cc + CR %10, %8, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 7, 14, 10, implicit killed $cc + CR %9, %10, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 8, 14, 10, implicit killed $cc + $r2l = COPY %12 + Return implicit $r2l +... + + +# Test CLR -> CL +# CHECK: name: fun4 +# CHECK: CL %10, %stack.0, 0, $noreg, implicit-def $cc :: (load 4 from %stack.0) +# CHECK: %12:gr32bit = LOCHIMux %12, 8, 14, 12, implicit killed $cc +--- +name: fun4 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: addr64bit } + - { id: 1, class: gr32bit } + - { id: 2, class: gr32bit } + - { id: 3, class: gr32bit } + - { id: 4, class: gr32bit } + - { id: 5, class: gr32bit } + - { id: 6, class: gr32bit } + - { id: 7, class: gr32bit } + - { id: 8, class: gr32bit } + - { id: 9, class: gr32bit } + - { id: 10, class: gr32bit } + - { id: 11, class: gr32bit } + - { id: 12, class: gr32bit } + - { id: 13, class: gr32bit } + - { id: 14, class: gr32bit } + - { id: 15, class: gr32bit } + - { id: 16, class: gr32bit } + - { id: 17, class: gr32bit } + - { id: 18, class: gr32bit } + - { id: 19, class: gr32bit } +liveins: + - { reg: '$r2d', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 + hasCalls: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d + + %0:addr64bit = COPY $r2d + %1:gr32bit = LMux %0, 0, $noreg + %2:gr32bit = LMux %0, 8, $noreg + %3:gr32bit = LMux %0, 16, $noreg + %4:gr32bit = LMux %0, 24, $noreg + %5:gr32bit = LMux %0, 32, $noreg + %6:gr32bit = LMux %0, 40, $noreg + %7:gr32bit = LMux %0, 48, $noreg + %8:gr32bit = LMux %0, 56, $noreg + %9:gr32bit = LMux %0, 64, $noreg + ADJCALLSTACKDOWN 0, 0 + CallBRASL @foo, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2l + %10:gr32bit = COPY $r2l + ADJCALLSTACKUP 0, 0 + CR %10, %1, implicit-def $cc + %12:gr32bit = COPY %10 + %12:gr32bit = LOCHIMux %12, 0, 14, 10, implicit killed $cc + CR %10, %2, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 1, 14, 10, implicit killed $cc + CR %10, %3, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 2, 14, 10, implicit killed $cc + CR %10, %4, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 3, 14, 10, implicit killed $cc + CR %10, %5, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 4, 14, 10, implicit killed $cc + CR %10, %6, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 5, 14, 10, implicit killed $cc + CR %10, %7, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 6, 14, 10, implicit killed $cc + CR %10, %8, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 7, 14, 10, implicit killed $cc + CLR %9, %10, implicit-def $cc + %12:gr32bit = LOCHIMux %12, 8, 14, 10, implicit killed $cc + $r2l = COPY %12 + Return implicit $r2l +...