Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/ErrorHandling.h" @@ -932,9 +933,12 @@ /// operand folded, otherwise NULL is returned. /// The new instruction is inserted before MI, and the client is responsible /// for removing the old instruction. + /// If VRM is passed, the assigned physregs can be inspected by target to + /// decide if an opcode is legal to use. MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS = nullptr) const; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const; /// Same as the previous version except it allows folding of any load and /// store from / to any address, not just from a specific stack slot. @@ -1024,7 +1028,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const { + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const { return nullptr; } Index: lib/CodeGen/InlineSpiller.cpp =================================================================== --- lib/CodeGen/InlineSpiller.cpp +++ lib/CodeGen/InlineSpiller.cpp @@ -837,7 +837,7 @@ MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) - : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS); + : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM); if (!FoldMI) return false; Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -522,7 +522,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS) const { + LiveIntervals *LIS, + VirtRegMap *VRM) const { auto Flags = MachineMemOperand::MONone; for (unsigned OpIdx : Ops) Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore @@ -568,7 +569,7 @@ MBB->insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM); } if (NewMI) { Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -162,7 +162,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// \returns true if a branch from an instruction with opcode \p BranchOpc /// bytes is capable of jumping to a position \p BrOffset bytes away. Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3039,7 +3039,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { // This is a bit of a hack. Consider this instruction: // // %0 = COPY %sp; GPR64all:%0 Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -129,13 +129,13 @@ let ValueCols = [["mem"]]; } -// Return the 3-operand form of a 2-operand instruction. -def getThreeOperandOpcode : InstrMapping { +// Return the 2-operand form of a 3-operand instruction. +def getTwoOperandOpcode : InstrMapping { let FilterClass = "InstSystemZ"; let RowFields = ["NumOpsKey"]; let ColFields = ["NumOpsValue"]; - let KeyCol = ["2"]; - let ValueCols = [["3"]]; + let KeyCol = ["3"]; + let ValueCols = [["2"]]; } //===----------------------------------------------------------------------===// @@ -3073,9 +3073,9 @@ RegisterOperand cls2> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRRFa, + def K : BinaryRRFa, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRR; } } @@ -3085,9 +3085,9 @@ RegisterOperand cls2> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRRFa, + def K : BinaryRRFa, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRRE; } } @@ -3188,9 +3188,9 @@ Immediate imm> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRIE, + def K : BinaryRIE, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRI; } } @@ -3265,9 +3265,9 @@ SDPatternOperator operator, RegisterOperand cls> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRSY, + def K : BinaryRSY, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRS; } } @@ -4593,9 +4593,9 @@ RegisterOperand cls, Immediate imm> { let NumOpsKey = key in { let NumOpsValue = "3" in - def K : BinaryRIEPseudo, + def K : BinaryRIEPseudo, Requires<[FeatureHighWord, FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRIPseudo, Requires<[FeatureHighWord]>; } Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -141,6 +141,10 @@ } // end namespace SystemZII +namespace SystemZ { +int getTwoOperandOpcode(uint16_t Opcode); +} + class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; SystemZSubtarget &STI; @@ -248,7 +252,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -957,73 +957,13 @@ } } -// Used to return from convertToThreeAddress after replacing two-address -// instruction OldMI with three-address instruction NewMI. -static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI, - MachineInstr *NewMI, - LiveVariables *LV) { - if (LV) { - unsigned NumOps = OldMI->getNumOperands(); - for (unsigned I = 1; I < NumOps; ++I) { - MachineOperand &Op = OldMI->getOperand(I); - if (Op.isReg() && Op.isKill()) - LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI); - } - } - transferDeadCC(OldMI, NewMI); - return NewMI; -} - MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { MachineBasicBlock *MBB = MI.getParent(); - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - - unsigned Opcode = MI.getOpcode(); - unsigned NumOps = MI.getNumOperands(); - - // Try to convert something like SLL into SLLK, if supported. - // We prefer to keep the two-operand form where possible both - // because it tends to be shorter and because some instructions - // have memory forms that can be used during spilling. - if (STI.hasDistinctOps()) { - MachineOperand &Dest = MI.getOperand(0); - MachineOperand &Src = MI.getOperand(1); - unsigned DestReg = Dest.getReg(); - unsigned SrcReg = Src.getReg(); - // AHIMux is only really a three-operand instruction when both operands - // are low registers. Try to constrain both operands to be low if - // possible. - if (Opcode == SystemZ::AHIMux && - TargetRegisterInfo::isVirtualRegister(DestReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg) && - MRI.getRegClass(DestReg)->contains(SystemZ::R1L) && - MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) { - MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass); - MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass); - } - int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); - if (ThreeOperandOpcode >= 0) { - // Create three address instruction without adding the implicit - // operands. Those will instead be copied over from the original - // instruction by the loop below. - MachineInstrBuilder MIB( - *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(), - /*NoImplicit=*/true)); - MIB.add(Dest); - // Keep the kill state, but drop the tied flag. - MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); - // Keep the remaining operands as-is. - for (unsigned I = 2; I < NumOps; ++I) - MIB.add(MI.getOperand(I)); - MBB->insert(MI, MIB); - return finishConvertToThreeAddress(&MI, MIB, LV); - } - } // Try to convert an AND into an RISBG-type instruction. - if (LogicOp And = interpretAndImmediate(Opcode)) { + // TODO: It might be beneficial to select RISBG and shorten to AND instead. + if (LogicOp And = interpretAndImmediate(MI.getOpcode())) { uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB; // AND IMMEDIATE leaves the other bits of the register unchanged. Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); @@ -1051,7 +991,16 @@ .addImm(Start) .addImm(End + 128) .addImm(0); - return finishConvertToThreeAddress(&MI, MIB, LV); + if (LV) { + unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = MI.getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), MI, *MIB); + } + } + transferDeadCC(&MI, MIB); + return MIB; } } return nullptr; @@ -1060,7 +1009,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Size = MFI.getObjectSize(FrameIndex); @@ -1214,6 +1163,21 @@ } } + // See if a 3-address instruction is convertible to 2-address and therefore + // also may be suitable for folding below. + int TwoAddressOpc = SystemZ::getTwoOperandOpcode(Opcode); + if (TwoAddressOpc != -1) { + MachineOperand &DstMO = MI.getOperand(0); + MachineOperand &LHSMO = MI.getOperand(1); + // Only try this whith virtual registers and a provided VRM. + if (TRI->isVirtualRegister(DstMO.getReg()) && + TRI->isVirtualRegister(LHSMO.getReg()) && VRM != nullptr && + VRM->hasPhys(DstMO.getReg()) && + VRM->getPhys(DstMO.getReg()) == VRM->getPhys(LHSMO.getReg()) && + !SystemZ::GRH32BitRegClass.contains(VRM->getPhys(DstMO.getReg()))) + Opcode = TwoAddressOpc; + } + // If the spilled operand is the final one, try to change R // into . int MemOpcode = SystemZ::getMemOpcode(Opcode); Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -81,7 +81,8 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); @@ -138,6 +139,51 @@ } } + if (VRM == nullptr) + return BaseImplRetVal; + + // Add any two address hints after any copy hints. + SmallSet TwoAddrHints; + for (auto &Use : MRI->reg_nodbg_instructions(VirtReg)) + if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) { + const MachineOperand *VRRegMO = nullptr; + const MachineOperand *OtherMO = nullptr; + const MachineOperand *CommuMO = nullptr; + if (VirtReg == Use.getOperand(0).getReg()) { + VRRegMO = &Use.getOperand(0); + OtherMO = &Use.getOperand(1); + if (Use.isCommutable()) + CommuMO = &Use.getOperand(2); + } else if (VirtReg == Use.getOperand(1).getReg()) { + VRRegMO = &Use.getOperand(1); + OtherMO = &Use.getOperand(0); + } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) { + VRRegMO = &Use.getOperand(2); + OtherMO = &Use.getOperand(0); + } else + continue; + + auto tryAddHint = [&](const MachineOperand *MO) -> void { + unsigned Reg = MO->getReg(); + unsigned PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); + if (PhysReg) { + if (MO->getSubReg()) + PhysReg = getSubReg(PhysReg, MO->getSubReg()); + if (VRRegMO->getSubReg()) + PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(), + MRI->getRegClass(VirtReg)); + if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) + TwoAddrHints.insert(PhysReg); + } + }; + tryAddHint(OtherMO); + if (CommuMO) + tryAddHint(CommuMO); + } + for (MCPhysReg OrderReg : Order) + if (TwoAddrHints.count(OrderReg)) + Hints.push_back(OrderReg); + return BaseImplRetVal; } Index: lib/Target/SystemZ/SystemZShortenInst.cpp =================================================================== --- lib/Target/SystemZ/SystemZShortenInst.cpp +++ lib/Target/SystemZ/SystemZShortenInst.cpp @@ -299,6 +299,31 @@ case SystemZ::VST64: Changed |= shortenOn0(MI, SystemZ::STD); break; + + default: { + int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(MI.getOpcode()); + if (TwoOperandOpcode == -1) + break; + + if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) && + (!MI.isCommutable() || + MI.getOperand(0).getReg() != MI.getOperand(2).getReg() || + !TII->commuteInstruction(MI, false, 1, 2))) + break; + + MI.setDesc(TII->get(TwoOperandOpcode)); + MI.tieOperands(0, 1); + if (TwoOperandOpcode == SystemZ::SLL || + TwoOperandOpcode == SystemZ::SLA || + TwoOperandOpcode == SystemZ::SRL || + TwoOperandOpcode == SystemZ::SRA) { + // These shifts only use the low 6 bits of the shift count. + MachineOperand &ImmMO = MI.getOperand(3); + ImmMO.setImm(ImmMO.getImm() & 0xfff); + } + Changed = true; + break; + } } LiveRegs.stepBackward(MI); Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -350,7 +350,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -4758,7 +4758,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, - int FrameIndex, LiveIntervals *LIS) const { + int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { // Check switch flag if (NoFusing) return nullptr; Index: test/CodeGen/SystemZ/asm-18.ll =================================================================== --- test/CodeGen/SystemZ/asm-18.ll +++ test/CodeGen/SystemZ/asm-18.ll @@ -608,8 +608,8 @@ ; CHECK-LABEL: f28: ; CHECK: ahik [[REG1:%r[0-5]]], %r2, 14 ; CHECK: stepa %r2, [[REG1]] -; CHECK: ahik [[TMP:%r[0-5]]], [[REG1]], 254 -; CHECK: risbhg [[REG2:%r[0-5]]], [[TMP]], 0, 159, 32 +; CHECK: risbhg [[REG1]], [[REG1]], 0, 159, 32 +; CHECK: aih [[REG1]], 254 ; CHECK: stepb [[REG1]], [[REG2]] ; CHECK: risbhg [[REG3:%r[0-5]]], [[REG2]], 0, 159, 0 ; CHECK: aih [[REG3]], 127 Index: test/CodeGen/SystemZ/codegenprepare-splitstore.ll =================================================================== --- test/CodeGen/SystemZ/codegenprepare-splitstore.ll +++ test/CodeGen/SystemZ/codegenprepare-splitstore.ll @@ -5,9 +5,9 @@ define void @fun(i16* %Src, i16* %Dst) { ; CHECK-LABEL: # %bb.0: ; CHECK: lh %r0, 0(%r2) +; CHECK-NEXT: srlk %r1, %r0, 8 ; CHECK-NEXT: stc %r0, 1(%r3) -; CHECK-NEXT: srl %r0, 8 -; CHECK-NEXT: stc %r0, 0(%r3) +; CHECK-NEXT: stc %r1, 0(%r3) ; CHECK-NEXT: br %r14 %1 = load i16, i16* %Src %2 = trunc i16 %1 to i8 Index: test/CodeGen/SystemZ/ctpop-01.ll =================================================================== --- test/CodeGen/SystemZ/ctpop-01.ll +++ test/CodeGen/SystemZ/ctpop-01.ll @@ -9,10 +9,10 @@ ; CHECK-LABEL: f1: ; CHECK: popcnt %r0, %r2 ; CHECK: sllk %r1, %r0, 16 -; CHECK: ar %r1, %r0 -; CHECK: sllk %r2, %r1, 8 -; CHECK: ar %r2, %r1 -; CHECK: srl %r2, 24 +; CHECK: ar %r0, %r1 +; CHECK: sllk %r1, %r0, 8 +; CHECK: ar %r0, %r1 +; CHECK: srlk %r2, %r0, 24 ; CHECK: br %r14 %popcnt = call i32 @llvm.ctpop.i32(i32 %a) @@ -23,9 +23,9 @@ ; CHECK-LABEL: f2: ; CHECK: llhr %r0, %r2 ; CHECK: popcnt %r0, %r0 -; CHECK: risblg %r2, %r0, 16, 151, 8 -; CHECK: ar %r2, %r0 -; CHECK: srl %r2, 8 +; CHECK: risblg %r1, %r0, 16, 151, 8 +; CHECK: ar %r0, %r1 +; CHECK: srlk %r2, %r0, 8 ; CHECK: br %r14 %and = and i32 %a, 65535 %popcnt = call i32 @llvm.ctpop.i32(i32 %and) @@ -46,12 +46,12 @@ ; CHECK-LABEL: f4: ; CHECK: popcnt %r0, %r2 ; CHECK: sllg %r1, %r0, 32 -; CHECK: agr %r1, %r0 -; CHECK: sllg %r0, %r1, 16 +; CHECK: agr %r0, %r1 +; CHECK: sllg %r1, %r0, 16 ; CHECK: agr %r0, %r1 ; CHECK: sllg %r1, %r0, 8 -; CHECK: agr %r1, %r0 -; CHECK: srlg %r2, %r1, 56 +; CHECK: agr %r0, %r1 +; CHECK: srlg %r2, %r0, 56 ; CHECK: br %r14 %popcnt = call i64 @llvm.ctpop.i64(i64 %a) ret i64 %popcnt @@ -76,8 +76,8 @@ ; CHECK: llghr %r0, %r2 ; CHECK: popcnt %r0, %r0 ; CHECK: risbg %r1, %r0, 48, 183, 8 -; CHECK: agr %r1, %r0 -; CHECK: srlg %r2, %r1, 8 +; CHECK: agr %r0, %r1 +; CHECK: srlg %r2, %r0, 8 ; CHECK: br %r14 %and = and i64 %a, 65535 %popcnt = call i64 @llvm.ctpop.i64(i64 %and) Index: test/CodeGen/SystemZ/int-add-05.ll =================================================================== --- test/CodeGen/SystemZ/int-add-05.ll +++ test/CodeGen/SystemZ/int-add-05.ll @@ -1,7 +1,7 @@ ; Test 64-bit addition in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s --check-prefixes=CHECK,Z10 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s --check-prefixes=CHECK,Z196 declare i64 @foo() @@ -97,10 +97,12 @@ } ; Check that additions of spilled values can use AG rather than AGR. +; Note: Z196 is suboptimal with one unfolded reload. define i64 @f9(i64 *%ptr0) { ; CHECK-LABEL: f9: ; CHECK: brasl %r14, foo@PLT -; CHECK: ag %r2, 160(%r15) +; Z10: ag %r2, 168(%r15) +; Z196: ag %r0, 168(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/int-sub-11.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/int-sub-11.ll @@ -0,0 +1,22 @@ +; Test of subtraction that involves a constant as the first operand +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check highest 16-bit signed int immediate value. +define i64 @f1(i64 %a) { +; CHECK-LABEL: f1: +; CHECK: lghi %r0, 32767 +; CHECK: sgrk %r2, %r0, %r2 +; CHECK: br %r14 + %sub = sub i64 32767, %a + ret i64 %sub +} +; Check highest 32-bit signed int immediate value. +define i64 @f2(i64 %a) { +; CHECK-LABEL: f2: +; CHECK: lgfi %r0, 2147483647 +; CHECK: sgrk %r2, %r0, %r2 +; CHECK: br %r14 + %sub = sub i64 2147483647, %a + ret i64 %sub +} Index: test/CodeGen/SystemZ/scalar-ctlz.ll =================================================================== --- test/CodeGen/SystemZ/scalar-ctlz.ll +++ test/CodeGen/SystemZ/scalar-ctlz.ll @@ -55,10 +55,9 @@ ; CHECK-LABEL: %bb.0: ; CHECK-NEXT: # kill ; CHECK-NEXT: llghr %r0, %r2 -; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: ahi %r2, -16 -; CHECK-NEXT: # kill +; CHECK-NEXT: flogr %r0, %r0 +; CHECK-NEXT: aghi %r0, -32 +; CHECK-NEXT: ahik %r2, %r0, -16 ; CHECK-NEXT: br %r14 %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 false) ret i16 %1 @@ -69,10 +68,9 @@ ; CHECK-LABEL: %bb.0: ; CHECK-NEXT: # kill ; CHECK-NEXT: llghr %r0, %r2 -; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: ahi %r2, -16 -; CHECK-NEXT: # kill +; CHECK-NEXT: flogr %r0, %r0 +; CHECK-NEXT: aghi %r0, -32 +; CHECK-NEXT: ahik %r2, %r0, -16 ; CHECK-NEXT: br %r14 %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true) ret i16 %1 @@ -83,10 +81,9 @@ ; CHECK-LABEL: %bb.0: ; CHECK-NEXT: # kill ; CHECK-NEXT: llgcr %r0, %r2 -; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: ahi %r2, -24 -; CHECK-NEXT: # kill +; CHECK-NEXT: flogr %r0, %r0 +; CHECK-NEXT: aghi %r0, -32 +; CHECK-NEXT: ahik %r2, %r0, -24 ; CHECK-NEXT: br %r14 %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 false) ret i8 %1 @@ -97,10 +94,9 @@ ; CHECK-LABEL: %bb.0: ; CHECK-NEXT: # kill ; CHECK-NEXT: llgcr %r0, %r2 -; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: ahi %r2, -24 -; CHECK-NEXT: # kill +; CHECK-NEXT: flogr %r0, %r0 +; CHECK-NEXT: aghi %r0, -32 +; CHECK-NEXT: ahik %r2, %r0, -24 ; CHECK-NEXT: br %r14 %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true) ret i8 %1 Index: test/CodeGen/SystemZ/store_nonbytesized_vecs.ll =================================================================== --- test/CodeGen/SystemZ/store_nonbytesized_vecs.ll +++ test/CodeGen/SystemZ/store_nonbytesized_vecs.ll @@ -75,17 +75,17 @@ ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: vlgvf %r3, %v26, 1 -; CHECK-NEXT: vlgvf %r1, %v26, 2 -; CHECK-NEXT: risbgn %r4, %r3, 0, 129, 62 -; CHECK-NEXT: rosbg %r4, %r1, 2, 32, 31 +; CHECK-DAG: vlgvf [[REG11:%r[0-9]+]], %v26, 1 +; CHECK-DAG: vlgvf [[REG12:%r[0-9]+]], %v26, 2 +; CHECK-DAG: risbgn [[REG13:%r[0-9]+]], [[REG11]], 0, 129, 62 +; CHECK-DAG: rosbg [[REG13]], [[REG12]], 2, 32, 31 ; CHECK-DAG: vlgvf %r0, %v26, 3 -; CHECK-DAG: rosbg %r4, %r0, 33, 63, 0 +; CHECK-DAG: rosbg [[REG13]], %r0, 33, 63, 0 ; CHECK-DAG: stc %r0, 30(%r2) -; CHECK-DAG: srl %r0, 8 +; CHECK-DAG: srlk %r1, %r0, 8 ; CHECK-DAG: vlgvf [[REG0:%r[0-9]+]], %v24, 1 ; CHECK-DAG: vlgvf [[REG1:%r[0-9]+]], %v24, 0 -; CHECK-DAG: sth %r0, 28(%r2) +; CHECK-DAG: sth %r1, 28(%r2) ; CHECK-DAG: vlgvf [[REG2:%r[0-9]+]], %v24, 2 ; CHECK-DAG: risbgn [[REG3:%r[0-9]+]], [[REG0]], 0, 133, 58 ; CHECK-DAG: rosbg [[REG3]], [[REG2]], 6, 36, 27 @@ -95,18 +95,18 @@ ; CHECK-DAG: rosbg [[REG3]], [[REG5]], 37, 63, 60 ; CHECK-DAG: sllg [[REG6:%r[0-9]+]], [[REG4]], 8 ; CHECK-DAG: rosbg [[REG6]], [[REG3]], 56, 63, 8 -; CHECK-NEXT: stg [[REG6]], 0(%r2) -; CHECK-NEXT: srlg [[REG7:%r[0-9]+]], %r4, 24 -; CHECK-NEXT: st [[REG7]], 24(%r2) -; CHECK-NEXT: vlgvf [[REG8:%r[0-9]+]], %v26, 0 -; CHECK-NEXT: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60 -; CHECK-NEXT: rosbg [[REG10]], [[REG8]], 4, 34, 29 -; CHECK-NEXT: sllg [[REG9:%r[0-9]+]], [[REG3]], 8 -; CHECK-NEXT: rosbg [[REG10]], %r3, 35, 63, 62 -; CHECK-NEXT: rosbg [[REG9]], [[REG10]], 56, 63, 8 -; CHECK-NEXT: stg [[REG9]], 8(%r2) -; CHECK-NEXT: sllg %r0, [[REG10]], 8 -; CHECK-NEXT: rosbg %r0, %r4, 56, 63, 8 +; CHECK-DAG: stg [[REG6]], 0(%r2) +; CHECK-DAG: srlg [[REG7:%r[0-9]+]], [[REG13]], 24 +; CHECK-DAG: st [[REG7]], 24(%r2) +; CHECK-DAG: vlgvf [[REG8:%r[0-9]+]], %v26, 0 +; CHECK-DAG: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60 +; CHECK-DAG: rosbg [[REG10]], [[REG8]], 4, 34, 29 +; CHECK-DAG: sllg [[REG9:%r[0-9]+]], [[REG3]], 8 +; CHECK-DAG: rosbg [[REG10]], [[REG11]], 35, 63, 62 +; CHECK-DAG: rosbg [[REG9]], [[REG10]], 56, 63, 8 +; CHECK-DAG: stg [[REG9]], 8(%r2) +; CHECK-DAG: sllg %r0, [[REG10]], 8 +; CHECK-DAG: rosbg %r0, [[REG13]], 56, 63, 8 ; CHECK-NEXT: stg %r0, 16(%r2) ; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 Index: test/CodeGen/SystemZ/vec-combine-02.ll =================================================================== --- test/CodeGen/SystemZ/vec-combine-02.ll +++ test/CodeGen/SystemZ/vec-combine-02.ll @@ -408,7 +408,7 @@ ; CHECK-NOT: vmrh ; CHECK: ar {{%r[0-5]}}, ; CHECK: ar {{%r[0-5]}}, -; CHECK: or %r2, +; CHECK: ork %r2, ; CHECK: br %r14 %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0 %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0