Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/ErrorHandling.h" @@ -932,9 +933,12 @@ /// operand folded, otherwise NULL is returned. /// The new instruction is inserted before MI, and the client is responsible /// for removing the old instruction. + /// If VRM is passed, the assigned physregs can be inspected by target to + /// decide on using an opcode (note that those assignments can still change). MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS = nullptr) const; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const; /// Same as the previous version except it allows folding of any load and /// store from / to any address, not just from a specific stack slot. @@ -1024,7 +1028,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const { + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const { return nullptr; } Index: include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- include/llvm/CodeGen/TargetPassConfig.h +++ include/llvm/CodeGen/TargetPassConfig.h @@ -386,6 +386,11 @@ return false; } + /// Add passes to be run immediately after virtual registers are rewritten + /// to physical registers. These passes may replace an MI with a new one, + /// but should preserve SlotIndexes while doing so. + virtual void addPostRewrite() { } + /// This method may be implemented by targets that want to run passes after /// register allocation pass pipeline but before prolog-epilog insertion. virtual void addPostRegAlloc() { } Index: lib/CodeGen/InlineSpiller.cpp =================================================================== --- lib/CodeGen/InlineSpiller.cpp +++ lib/CodeGen/InlineSpiller.cpp @@ -837,7 +837,7 @@ MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) - : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS); + : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM); if (!FoldMI) return false; Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -524,7 +524,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS) const { + LiveIntervals *LIS, + VirtRegMap *VRM) const { auto Flags = MachineMemOperand::MONone; for (unsigned OpIdx : Ops) Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore @@ -570,7 +571,7 @@ MBB->insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM); } if (NewMI) { Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -1168,6 +1168,10 @@ addPass(&MachineSchedulerID); if (addRegAssignmentOptimized()) { + // Allow targets to expand pseudo instructions depending on the choice of + // registers before MachineCopyPropagation. + addPostRewrite(); + // Copy propagate to forward register uses and try to eliminate COPYs that // were not coalesced. addPass(&MachineCopyPropagationID); Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -162,7 +162,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// \returns true if a branch from an instruction with opcode \p BranchOpc /// bytes is capable of jumping to a position \p BrOffset bytes away. Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3049,7 +3049,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { // This is a bit of a hack. Consider this instruction: // // %0 = COPY %sp; GPR64all:%0 Index: lib/Target/SystemZ/CMakeLists.txt =================================================================== --- lib/Target/SystemZ/CMakeLists.txt +++ lib/Target/SystemZ/CMakeLists.txt @@ -30,6 +30,7 @@ SystemZMCInstLower.cpp SystemZRegisterInfo.cpp SystemZSelectionDAGInfo.cpp + SystemZPostRewrite.cpp SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp Index: lib/Target/SystemZ/SystemZ.h =================================================================== --- lib/Target/SystemZ/SystemZ.h +++ lib/Target/SystemZ/SystemZ.h @@ -194,6 +194,7 @@ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -37,6 +37,12 @@ string OpKey = ""; string OpType = "none"; + // MemKey identifies a targe reg-mem opcode, while MemType can be either + // "pseudo" or "target". This is used to map a pseduo memory instruction to + // its corresponding target opcode. See comment at MemFoldPseudo. + string MemKey = ""; + string MemType = "none"; + // Many distinct-operands instructions have older 2-operand equivalents. // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs, // with NumOpsValue being "2" or "3" as appropriate. @@ -97,6 +103,12 @@ let TSFlags{20} = IsLogical; } +class Pseudo pattern> + : InstSystemZ<0, outs, ins, "", pattern> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + //===----------------------------------------------------------------------===// // Mappings between instructions //===----------------------------------------------------------------------===// @@ -120,7 +132,8 @@ let ValueCols = [["20"]]; } -// Return the memory form of a register instruction. +// Return the memory form of a register instruction. Note that this may +// return a MemFoldPseudo instruction (see below). def getMemOpcode : InstrMapping { let FilterClass = "InstSystemZ"; let RowFields = ["OpKey"]; @@ -129,13 +142,22 @@ let ValueCols = [["mem"]]; } -// Return the 3-operand form of a 2-operand instruction. -def getThreeOperandOpcode : InstrMapping { +// Return the target memory instruction for a MemFoldPseudo. +def getTargetMemOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["MemKey"]; + let ColFields = ["MemType"]; + let KeyCol = ["pseudo"]; + let ValueCols = [["target"]]; +} + +// Return the 2-operand form of a 3-operand instruction. +def getTwoOperandOpcode : InstrMapping { let FilterClass = "InstSystemZ"; let RowFields = ["NumOpsKey"]; let ColFields = ["NumOpsValue"]; - let KeyCol = ["2"]; - let ValueCols = [["3"]]; + let KeyCol = ["3"]; + let ValueCols = [["2"]]; } //===----------------------------------------------------------------------===// @@ -3066,6 +3088,8 @@ mnemonic#"\t$R1, $R2, $R3", [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> { let M4 = 0; + let OpKey = mnemonic#cls1; + let OpType = "reg"; } multiclass BinaryRRAndK opcode1, bits<16> opcode2, @@ -3073,9 +3097,9 @@ RegisterOperand cls2> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRRFa, + def K : BinaryRRFa, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRR; } } @@ -3085,9 +3109,9 @@ RegisterOperand cls2> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRRFa, + def K : BinaryRRFa, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRRE; } } @@ -3188,9 +3212,9 @@ Immediate imm> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRIE, + def K : BinaryRIE, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRI; } } @@ -3265,9 +3289,9 @@ SDPatternOperator operator, RegisterOperand cls> { let NumOpsKey = mnemonic in { let NumOpsValue = "3" in - def K : BinaryRSY, + def K : BinaryRSY, Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRS; } } @@ -3334,6 +3358,36 @@ let AccessBytes = bytes; } +// A pseudo that is used during register allocation when folding a memory +// operand. The 3-address register instruction with a spilled source cannot +// be converted directly to a target 2-address reg/mem instruction. +// Mapping: R -> MemFoldPseudo -> +class MemFoldPseudo bytes, + AddressingMode mode> + : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> { + let OpKey = mnemonic#"rk"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "pseudo"; + let mayLoad = 1; + let AccessBytes = bytes; + let HasIndex = 1; + let hasNoSchedulingInfo = 1; +} + +multiclass BinaryRXYAndPseudo opcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> { + + def "" : BinaryRXY { + let MemKey = mnemonic#cls; + let MemType = "target"; + } + let Has20BitOffset = 1 in + def _MemFoldPseudo : MemFoldPseudo; +} + multiclass BinaryRXPair rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes> { @@ -3347,6 +3401,24 @@ } } +multiclass BinaryRXPairAndPseudo rxOpcode, + bits<16> rxyOpcode, SDPatternOperator operator, + RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + def "" : BinaryRX { + let DispSize = "12"; + let MemKey = mnemonic#cls; + let MemType = "target"; + } + let DispSize = "20" in + def Y : BinaryRXY; + } + def _MemFoldPseudo : MemFoldPseudo; +} + class BinarySI opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr12only> : InstSI pattern> - : InstSystemZ<0, outs, ins, "", pattern> { - let isPseudo = 1; - let isCodeGenOnly = 1; -} - // Like UnaryRI, but expanded after RA depending on the choice of register. class UnaryRIPseudo @@ -4593,9 +4659,9 @@ RegisterOperand cls, Immediate imm> { let NumOpsKey = key in { let NumOpsValue = "3" in - def K : BinaryRIEPseudo, + def K : BinaryRIEPseudo, Requires<[FeatureHighWord, FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + let NumOpsValue = "2" in def "" : BinaryRIPseudo, Requires<[FeatureHighWord]>; } Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -141,6 +141,11 @@ } // end namespace SystemZII +namespace SystemZ { +int getTwoOperandOpcode(uint16_t Opcode); +int getTargetMemOpcode(uint16_t Opcode); +} + class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; SystemZSubtarget &STI; @@ -248,7 +253,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -957,73 +957,13 @@ } } -// Used to return from convertToThreeAddress after replacing two-address -// instruction OldMI with three-address instruction NewMI. -static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI, - MachineInstr *NewMI, - LiveVariables *LV) { - if (LV) { - unsigned NumOps = OldMI->getNumOperands(); - for (unsigned I = 1; I < NumOps; ++I) { - MachineOperand &Op = OldMI->getOperand(I); - if (Op.isReg() && Op.isKill()) - LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI); - } - } - transferDeadCC(OldMI, NewMI); - return NewMI; -} - MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { MachineBasicBlock *MBB = MI.getParent(); - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - - unsigned Opcode = MI.getOpcode(); - unsigned NumOps = MI.getNumOperands(); - - // Try to convert something like SLL into SLLK, if supported. - // We prefer to keep the two-operand form where possible both - // because it tends to be shorter and because some instructions - // have memory forms that can be used during spilling. - if (STI.hasDistinctOps()) { - MachineOperand &Dest = MI.getOperand(0); - MachineOperand &Src = MI.getOperand(1); - unsigned DestReg = Dest.getReg(); - unsigned SrcReg = Src.getReg(); - // AHIMux is only really a three-operand instruction when both operands - // are low registers. Try to constrain both operands to be low if - // possible. - if (Opcode == SystemZ::AHIMux && - TargetRegisterInfo::isVirtualRegister(DestReg) && - TargetRegisterInfo::isVirtualRegister(SrcReg) && - MRI.getRegClass(DestReg)->contains(SystemZ::R1L) && - MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) { - MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass); - MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass); - } - int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); - if (ThreeOperandOpcode >= 0) { - // Create three address instruction without adding the implicit - // operands. Those will instead be copied over from the original - // instruction by the loop below. - MachineInstrBuilder MIB( - *MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(), - /*NoImplicit=*/true)); - MIB.add(Dest); - // Keep the kill state, but drop the tied flag. - MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); - // Keep the remaining operands as-is. - for (unsigned I = 2; I < NumOps; ++I) - MIB.add(MI.getOperand(I)); - MBB->insert(MI, MIB); - return finishConvertToThreeAddress(&MI, MIB, LV); - } - } // Try to convert an AND into an RISBG-type instruction. - if (LogicOp And = interpretAndImmediate(Opcode)) { + // TODO: It might be beneficial to select RISBG and shorten to AND instead. + if (LogicOp And = interpretAndImmediate(MI.getOpcode())) { uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB; // AND IMMEDIATE leaves the other bits of the register unchanged. Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); @@ -1051,7 +991,16 @@ .addImm(Start) .addImm(End + 128) .addImm(0); - return finishConvertToThreeAddress(&MI, MIB, LV); + if (LV) { + unsigned NumOps = MI.getNumOperands(); + for (unsigned I = 1; I < NumOps; ++I) { + MachineOperand &Op = MI.getOperand(I); + if (Op.isReg() && Op.isKill()) + LV->replaceKillInstruction(Op.getReg(), MI, *MIB); + } + } + transferDeadCC(&MI, MIB); + return MIB; } } return nullptr; @@ -1060,7 +1009,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Size = MFI.getObjectSize(FrameIndex); @@ -1214,12 +1163,37 @@ } } - // If the spilled operand is the final one, try to change R - // into . + // If the spilled operand is the final one or the instruction is + // commutable, try to change R into . + unsigned NumOps = MI.getNumExplicitOperands(); int MemOpcode = SystemZ::getMemOpcode(Opcode); + + // See if this is a 3-address instruction that is convertible to 2-address + // and suitable for folding below. Only try this whith virtual registers + // and a provided VRM (during regalloc). + bool NeedsCommute = false; + if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) { + if (VRM == nullptr) + MemOpcode = -1; + else { + assert(NumOps == 3 && "Expected two source registers."); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned DstPhys = + (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg); + unsigned SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg() + : ((OpNum == 1 && MI.isCommutable()) + ? MI.getOperand(2).getReg() + : 0)); + if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg && + TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg)) + NeedsCommute = (OpNum == 1); + else + MemOpcode = -1; + } + } + if (MemOpcode >= 0) { - unsigned NumOps = MI.getNumExplicitOperands(); - if (OpNum == NumOps - 1) { + if ((OpNum == NumOps - 1) || NeedsCommute) { const MCInstrDesc &MemDesc = get(MemOpcode); uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); assert(AccessBytes != 0 && "Size of access should be known"); @@ -1227,8 +1201,12 @@ uint64_t Offset = Size - AccessBytes; MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(MemOpcode)); - for (unsigned I = 0; I < OpNum; ++I) - MIB.add(MI.getOperand(I)); + MIB.add(MI.getOperand(0)); + if (NeedsCommute) + MIB.add(MI.getOperand(2)); + else + for (unsigned I = 1; I < OpNum; ++I) + MIB.add(MI.getOperand(I)); MIB.addFrameIndex(FrameIndex).addImm(Offset); if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -916,11 +916,11 @@ // Addition of memory. defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>; - defm A : BinaryRXPair<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>; + defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>; def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>, Requires<[FeatureMiscellaneousExtensions2]>; def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>; - def AG : BinaryRXY<"ag", 0xE308, z_sadd, GR64, load, 8>; + defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, load, 8>; // Addition to memory. def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; @@ -958,9 +958,9 @@ Requires<[FeatureHighWord]>; // Addition of memory. - defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>; + defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>; def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>; - def ALG : BinaryRXY<"alg", 0xE30A, z_uadd, GR64, load, 8>; + defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, load, 8>; // Addition to memory. def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>; @@ -1003,11 +1003,11 @@ // Subtraction of memory. defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>; - defm S : BinaryRXPair<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>; + defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>; def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>, Requires<[FeatureMiscellaneousExtensions2]>; def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>; - def SG : BinaryRXY<"sg", 0xE309, z_ssub, GR64, load, 8>; + defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, load, 8>; } defm : SXB; @@ -1055,9 +1055,9 @@ def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>; // Subtraction of memory. - defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>; + defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>; def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>; - def SLG : BinaryRXY<"slg", 0xE30B, z_usub, GR64, load, 8>; + defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, load, 8>; } defm : ZXB; @@ -1132,8 +1132,8 @@ // ANDs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; - def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>; + defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>; } // AND to memory @@ -1189,8 +1189,8 @@ // ORs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; - def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>; + defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>; } // OR to memory @@ -1229,8 +1229,8 @@ // XORs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; - def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>; + defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>; } // XOR to memory Index: lib/Target/SystemZ/SystemZPostRewrite.cpp =================================================================== --- /dev/null +++ lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -0,0 +1,124 @@ +//==---- SystemZPostRewrite.cpp - Select pseudos after RegAlloc ---*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that is run immediately after VirtRegRewriter +// but before MachineCopyPropagation. The purpose is to lower pseudos to +// target instructions before any later pass might substitute a register for +// another. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass" + +#define DEBUG_TYPE "systemz-postrewrite" +STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops."); + +namespace llvm { + void initializeSystemZPostRewritePass(PassRegistry&); +} + +namespace { + +class SystemZPostRewrite : public MachineFunctionPass { +public: + static char ID; + SystemZPostRewrite() : MachineFunctionPass(ID) { + initializeSystemZPostRewritePass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool selectMBB(MachineBasicBlock &MBB); +}; + +char SystemZPostRewrite::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite", + SYSTEMZ_POSTREWRITE_NAME, false, false) + +/// Returns an instance of the Post Rewrite pass. +FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) { + return new SystemZPostRewrite(); +} + +/// If MBBI references a pseudo instruction that should be selected here, +/// do it and return true. Otherwise return false. +bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + + // Note: If this could be done during regalloc in foldMemoryOperandImpl() + // while also updating the LiveIntervals, there would be no need for the + // MemFoldPseudo to begin with. + int TargetMemOpcode = SystemZ::getTargetMemOpcode(Opcode); + if (TargetMemOpcode != -1) { + MI.setDesc(TII->get(TargetMemOpcode)); + MI.tieOperands(0, 1); + unsigned DstReg = MI.getOperand(0).getReg(); + MachineOperand &SrcMO = MI.getOperand(1); + if (DstReg != SrcMO.getReg()) { + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg) + .addReg(SrcMO.getReg()); + SrcMO.setReg(DstReg); + MemFoldCopies++; + } + return true; + } + + return false; +} + +/// Iterate over the instructions in basic block MBB and select any +/// pseudo instructions. Return true if anything was modified. +bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= selectMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getSubtarget().getInstrInfo()); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= selectMBB(MBB); + + return Modified; +} + Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -81,7 +81,8 @@ const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const SystemZSubtarget &Subtarget = MF.getSubtarget(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); @@ -138,6 +139,51 @@ } } + if (VRM == nullptr) + return BaseImplRetVal; + + // Add any two address hints after any copy hints. + SmallSet TwoAddrHints; + for (auto &Use : MRI->reg_nodbg_instructions(VirtReg)) + if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) { + const MachineOperand *VRRegMO = nullptr; + const MachineOperand *OtherMO = nullptr; + const MachineOperand *CommuMO = nullptr; + if (VirtReg == Use.getOperand(0).getReg()) { + VRRegMO = &Use.getOperand(0); + OtherMO = &Use.getOperand(1); + if (Use.isCommutable()) + CommuMO = &Use.getOperand(2); + } else if (VirtReg == Use.getOperand(1).getReg()) { + VRRegMO = &Use.getOperand(1); + OtherMO = &Use.getOperand(0); + } else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) { + VRRegMO = &Use.getOperand(2); + OtherMO = &Use.getOperand(0); + } else + continue; + + auto tryAddHint = [&](const MachineOperand *MO) -> void { + unsigned Reg = MO->getReg(); + unsigned PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg); + if (PhysReg) { + if (MO->getSubReg()) + PhysReg = getSubReg(PhysReg, MO->getSubReg()); + if (VRRegMO->getSubReg()) + PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(), + MRI->getRegClass(VirtReg)); + if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg)) + TwoAddrHints.insert(PhysReg); + } + }; + tryAddHint(OtherMO); + if (CommuMO) + tryAddHint(CommuMO); + } + for (MCPhysReg OrderReg : Order) + if (TwoAddrHints.count(OrderReg)) + Hints.push_back(OrderReg); + return BaseImplRetVal; } Index: lib/Target/SystemZ/SystemZShortenInst.cpp =================================================================== --- lib/Target/SystemZ/SystemZShortenInst.cpp +++ lib/Target/SystemZ/SystemZShortenInst.cpp @@ -299,6 +299,31 @@ case SystemZ::VST64: Changed |= shortenOn0(MI, SystemZ::STD); break; + + default: { + int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(MI.getOpcode()); + if (TwoOperandOpcode == -1) + break; + + if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) && + (!MI.isCommutable() || + MI.getOperand(0).getReg() != MI.getOperand(2).getReg() || + !TII->commuteInstruction(MI, false, 1, 2))) + break; + + MI.setDesc(TII->get(TwoOperandOpcode)); + MI.tieOperands(0, 1); + if (TwoOperandOpcode == SystemZ::SLL || + TwoOperandOpcode == SystemZ::SLA || + TwoOperandOpcode == SystemZ::SRL || + TwoOperandOpcode == SystemZ::SRA) { + // These shifts only use the low 6 bits of the shift count. + MachineOperand &ImmMO = MI.getOperand(3); + ImmMO.setImm(ImmMO.getImm() & 0xfff); + } + Changed = true; + break; + } } LiveRegs.stepBackward(MI); Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -183,6 +183,7 @@ void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPostRewrite() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -212,7 +213,16 @@ return true; } +void SystemZPassConfig::addPostRewrite() { + addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPreSched2() { + // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() + // is not called). + if (getOptLevel() == CodeGenOpt::None) + addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); + addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); if (getOptLevel() != CodeGenOpt::None) Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -350,7 +350,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -4773,7 +4773,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, - int FrameIndex, LiveIntervals *LIS) const { + int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { // Check switch flag if (NoFusing) return nullptr; Index: test/CodeGen/SystemZ/asm-18.ll =================================================================== --- test/CodeGen/SystemZ/asm-18.ll +++ test/CodeGen/SystemZ/asm-18.ll @@ -603,13 +603,13 @@ } ; Test three-operand halfword immediate addition involving mixtures of low -; and high registers. RISBHG/AIH would be OK too, instead of AHIK/RISBHG. +; and high registers. AHIK/RISBHG would be OK too, instead of RISBHG/AIH. define i32 @f28(i32 %old) { ; CHECK-LABEL: f28: ; CHECK: ahik [[REG1:%r[0-5]]], %r2, 14 ; CHECK: stepa %r2, [[REG1]] -; CHECK: ahik [[TMP:%r[0-5]]], [[REG1]], 254 -; CHECK: risbhg [[REG2:%r[0-5]]], [[TMP]], 0, 159, 32 +; CHECK: risbhg [[REG1]], [[REG1]], 0, 159, 32 +; CHECK: aih [[REG1]], 254 ; CHECK: stepb [[REG1]], [[REG2]] ; CHECK: risbhg [[REG3:%r[0-5]]], [[REG2]], 0, 159, 0 ; CHECK: aih [[REG3]], 127 Index: test/CodeGen/SystemZ/codegenprepare-splitstore.ll =================================================================== --- test/CodeGen/SystemZ/codegenprepare-splitstore.ll +++ test/CodeGen/SystemZ/codegenprepare-splitstore.ll @@ -5,9 +5,9 @@ define void @fun(i16* %Src, i16* %Dst) { ; CHECK-LABEL: # %bb.0: ; CHECK: lh %r0, 0(%r2) +; CHECK-NEXT: srlk %r1, %r0, 8 ; CHECK-NEXT: stc %r0, 1(%r3) -; CHECK-NEXT: srl %r0, 8 -; CHECK-NEXT: stc %r0, 0(%r3) +; CHECK-NEXT: stc %r1, 0(%r3) ; CHECK-NEXT: br %r14 %1 = load i16, i16* %Src %2 = trunc i16 %1 to i8 Index: test/CodeGen/SystemZ/ctpop-01.ll =================================================================== --- test/CodeGen/SystemZ/ctpop-01.ll +++ test/CodeGen/SystemZ/ctpop-01.ll @@ -9,10 +9,10 @@ ; CHECK-LABEL: f1: ; CHECK: popcnt %r0, %r2 ; CHECK: sllk %r1, %r0, 16 -; CHECK: ar %r1, %r0 -; CHECK: sllk %r2, %r1, 8 -; CHECK: ar %r2, %r1 -; CHECK: srl %r2, 24 +; CHECK: ar %r0, %r1 +; CHECK: sllk %r1, %r0, 8 +; CHECK: ar %r0, %r1 +; CHECK: srlk %r2, %r0, 24 ; CHECK: br %r14 %popcnt = call i32 @llvm.ctpop.i32(i32 %a) @@ -23,9 +23,9 @@ ; CHECK-LABEL: f2: ; CHECK: llhr %r0, %r2 ; CHECK: popcnt %r0, %r0 -; CHECK: risblg %r2, %r0, 16, 151, 8 -; CHECK: ar %r2, %r0 -; CHECK: srl %r2, 8 +; CHECK: risblg %r1, %r0, 16, 151, 8 +; CHECK: ar %r0, %r1 +; CHECK: srlk %r2, %r0, 8 ; CHECK: br %r14 %and = and i32 %a, 65535 %popcnt = call i32 @llvm.ctpop.i32(i32 %and) @@ -46,12 +46,12 @@ ; CHECK-LABEL: f4: ; CHECK: popcnt %r0, %r2 ; CHECK: sllg %r1, %r0, 32 -; CHECK: agr %r1, %r0 -; CHECK: sllg %r0, %r1, 16 +; CHECK: agr %r0, %r1 +; CHECK: sllg %r1, %r0, 16 ; CHECK: agr %r0, %r1 ; CHECK: sllg %r1, %r0, 8 -; CHECK: agr %r1, %r0 -; CHECK: srlg %r2, %r1, 56 +; CHECK: agr %r0, %r1 +; CHECK: srlg %r2, %r0, 56 ; CHECK: br %r14 %popcnt = call i64 @llvm.ctpop.i64(i64 %a) ret i64 %popcnt @@ -76,8 +76,8 @@ ; CHECK: llghr %r0, %r2 ; CHECK: popcnt %r0, %r0 ; CHECK: risbg %r1, %r0, 48, 183, 8 -; CHECK: agr %r1, %r0 -; CHECK: srlg %r2, %r1, 8 +; CHECK: agr %r0, %r1 +; CHECK: srlg %r2, %r0, 8 ; CHECK: br %r14 %and = and i64 %a, 65535 %popcnt = call i64 @llvm.ctpop.i64(i64 %and) Index: test/CodeGen/SystemZ/int-add-05.ll =================================================================== --- test/CodeGen/SystemZ/int-add-05.ll +++ test/CodeGen/SystemZ/int-add-05.ll @@ -1,7 +1,7 @@ ; Test 64-bit addition in which the second operand is variable. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s --check-prefixes=CHECK,Z10 +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s --check-prefixes=CHECK,Z196 declare i64 @foo() @@ -97,10 +97,12 @@ } ; Check that additions of spilled values can use AG rather than AGR. +; Note: Z196 is suboptimal with one unfolded reload. define i64 @f9(i64 *%ptr0) { ; CHECK-LABEL: f9: ; CHECK: brasl %r14, foo@PLT -; CHECK: ag %r2, 160(%r15) +; Z10: ag %r2, 168(%r15) +; Z196: ag %r0, 168(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/int-sub-11.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/int-sub-11.ll @@ -0,0 +1,22 @@ +; Test of subtraction that involves a constant as the first operand +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +; Check highest 16-bit signed int immediate value. +define i64 @f1(i64 %a) { +; CHECK-LABEL: f1: +; CHECK: lghi %r0, 32767 +; CHECK: sgrk %r2, %r0, %r2 +; CHECK: br %r14 + %sub = sub i64 32767, %a + ret i64 %sub +} +; Check highest 32-bit signed int immediate value. +define i64 @f2(i64 %a) { +; CHECK-LABEL: f2: +; CHECK: lgfi %r0, 2147483647 +; CHECK: sgrk %r2, %r0, %r2 +; CHECK: br %r14 + %sub = sub i64 2147483647, %a + ret i64 %sub +} Index: test/CodeGen/SystemZ/scalar-ctlz.ll =================================================================== --- test/CodeGen/SystemZ/scalar-ctlz.ll +++ test/CodeGen/SystemZ/scalar-ctlz.ll @@ -55,10 +55,9 @@ ; CHECK-LABEL: %bb.0: ; CHECK-NEXT: # kill ; CHECK-NEXT: llghr %r0, %r2 -; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: ahi %r2, -16 -; CHECK-NEXT: # kill +; CHECK-NEXT: flogr %r0, %r0 +; CHECK-NEXT: aghi %r0, -32 +; CHECK-NEXT: ahik %r2, %r0, -16 ; CHECK-NEXT: br %r14 %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 false) ret i16 %1 @@ -69,10 +68,9 @@ ; CHECK-LABEL: %bb.0: ; CHECK-NEXT: # kill ; CHECK-NEXT: llghr %r0, %r2 -; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: ahi %r2, -16 -; CHECK-NEXT: # kill +; CHECK-NEXT: flogr %r0, %r0 +; CHECK-NEXT: aghi %r0, -32 +; CHECK-NEXT: ahik %r2, %r0, -16 ; CHECK-NEXT: br %r14 %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true) ret i16 %1 @@ -83,10 +81,9 @@ ; CHECK-LABEL: %bb.0: ; CHECK-NEXT: # kill ; CHECK-NEXT: llgcr %r0, %r2 -; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: ahi %r2, -24 -; CHECK-NEXT: # kill +; CHECK-NEXT: flogr %r0, %r0 +; CHECK-NEXT: aghi %r0, -32 +; CHECK-NEXT: ahik %r2, %r0, -24 ; CHECK-NEXT: br %r14 %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 false) ret i8 %1 @@ -97,10 +94,9 @@ ; CHECK-LABEL: %bb.0: ; CHECK-NEXT: # kill ; CHECK-NEXT: llgcr %r0, %r2 -; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: ahi %r2, -24 -; CHECK-NEXT: # kill +; CHECK-NEXT: flogr %r0, %r0 +; CHECK-NEXT: aghi %r0, -32 +; CHECK-NEXT: ahik %r2, %r0, -24 ; CHECK-NEXT: br %r14 %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true) ret i8 %1 Index: test/CodeGen/SystemZ/store_nonbytesized_vecs.ll =================================================================== --- test/CodeGen/SystemZ/store_nonbytesized_vecs.ll +++ test/CodeGen/SystemZ/store_nonbytesized_vecs.ll @@ -75,17 +75,17 @@ ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: vlgvf %r3, %v26, 1 -; CHECK-NEXT: vlgvf %r1, %v26, 2 -; CHECK-NEXT: risbgn %r4, %r3, 0, 129, 62 -; CHECK-NEXT: rosbg %r4, %r1, 2, 32, 31 +; CHECK-DAG: vlgvf [[REG11:%r[0-9]+]], %v26, 1 +; CHECK-DAG: vlgvf [[REG12:%r[0-9]+]], %v26, 2 +; CHECK-DAG: risbgn [[REG13:%r[0-9]+]], [[REG11]], 0, 129, 62 +; CHECK-DAG: rosbg [[REG13]], [[REG12]], 2, 32, 31 ; CHECK-DAG: vlgvf %r0, %v26, 3 -; CHECK-DAG: rosbg %r4, %r0, 33, 63, 0 +; CHECK-DAG: rosbg [[REG13]], %r0, 33, 63, 0 ; CHECK-DAG: stc %r0, 30(%r2) -; CHECK-DAG: srl %r0, 8 +; CHECK-DAG: srlk %r1, %r0, 8 ; CHECK-DAG: vlgvf [[REG0:%r[0-9]+]], %v24, 1 ; CHECK-DAG: vlgvf [[REG1:%r[0-9]+]], %v24, 0 -; CHECK-DAG: sth %r0, 28(%r2) +; CHECK-DAG: sth %r1, 28(%r2) ; CHECK-DAG: vlgvf [[REG2:%r[0-9]+]], %v24, 2 ; CHECK-DAG: risbgn [[REG3:%r[0-9]+]], [[REG0]], 0, 133, 58 ; CHECK-DAG: rosbg [[REG3]], [[REG2]], 6, 36, 27 @@ -95,18 +95,18 @@ ; CHECK-DAG: rosbg [[REG3]], [[REG5]], 37, 63, 60 ; CHECK-DAG: sllg [[REG6:%r[0-9]+]], [[REG4]], 8 ; CHECK-DAG: rosbg [[REG6]], [[REG3]], 56, 63, 8 -; CHECK-NEXT: stg [[REG6]], 0(%r2) -; CHECK-NEXT: srlg [[REG7:%r[0-9]+]], %r4, 24 -; CHECK-NEXT: st [[REG7]], 24(%r2) -; CHECK-NEXT: vlgvf [[REG8:%r[0-9]+]], %v26, 0 -; CHECK-NEXT: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60 -; CHECK-NEXT: rosbg [[REG10]], [[REG8]], 4, 34, 29 -; CHECK-NEXT: sllg [[REG9:%r[0-9]+]], [[REG3]], 8 -; CHECK-NEXT: rosbg [[REG10]], %r3, 35, 63, 62 -; CHECK-NEXT: rosbg [[REG9]], [[REG10]], 56, 63, 8 -; CHECK-NEXT: stg [[REG9]], 8(%r2) -; CHECK-NEXT: sllg %r0, [[REG10]], 8 -; CHECK-NEXT: rosbg %r0, %r4, 56, 63, 8 +; CHECK-DAG: stg [[REG6]], 0(%r2) +; CHECK-DAG: srlg [[REG7:%r[0-9]+]], [[REG13]], 24 +; CHECK-DAG: st [[REG7]], 24(%r2) +; CHECK-DAG: vlgvf [[REG8:%r[0-9]+]], %v26, 0 +; CHECK-DAG: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60 +; CHECK-DAG: rosbg [[REG10]], [[REG8]], 4, 34, 29 +; CHECK-DAG: sllg [[REG9:%r[0-9]+]], [[REG3]], 8 +; CHECK-DAG: rosbg [[REG10]], [[REG11]], 35, 63, 62 +; CHECK-DAG: rosbg [[REG9]], [[REG10]], 56, 63, 8 +; CHECK-DAG: stg [[REG9]], 8(%r2) +; CHECK-DAG: sllg %r0, [[REG10]], 8 +; CHECK-DAG: rosbg %r0, [[REG13]], 56, 63, 8 ; CHECK-NEXT: stg %r0, 16(%r2) ; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 Index: test/CodeGen/SystemZ/vec-combine-02.ll =================================================================== --- test/CodeGen/SystemZ/vec-combine-02.ll +++ test/CodeGen/SystemZ/vec-combine-02.ll @@ -408,7 +408,7 @@ ; CHECK-NOT: vmrh ; CHECK: ar {{%r[0-5]}}, ; CHECK: ar {{%r[0-5]}}, -; CHECK: or %r2, +; CHECK: ork %r2, ; CHECK: br %r14 %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0 %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0