Index: include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- include/llvm/CodeGen/TargetInstrInfo.h +++ include/llvm/CodeGen/TargetInstrInfo.h @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/ErrorHandling.h" @@ -932,9 +933,12 @@ /// operand folded, otherwise NULL is returned. /// The new instruction is inserted before MI, and the client is responsible /// for removing the old instruction. + /// If VRM is passed, the assigned physregs can be inspected by target to + /// decide on using an opcode (note that those assignments can still change). MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS = nullptr) const; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const; /// Same as the previous version except it allows folding of any load and /// store from / to any address, not just from a specific stack slot. @@ -1024,7 +1028,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const { + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const { return nullptr; } Index: include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- include/llvm/CodeGen/TargetPassConfig.h +++ include/llvm/CodeGen/TargetPassConfig.h @@ -386,6 +386,11 @@ return false; } + /// Add passes to be run immediately after virtual registers are rewritten + /// to physical registers. These passes may replace an MI with a new one, + /// but should preserve SlotIndexes while doing so. + virtual void addPostRewrite() { } + /// This method may be implemented by targets that want to run passes after /// register allocation pass pipeline but before prolog-epilog insertion. virtual void addPostRegAlloc() { } Index: lib/CodeGen/InlineSpiller.cpp =================================================================== --- lib/CodeGen/InlineSpiller.cpp +++ lib/CodeGen/InlineSpiller.cpp @@ -837,7 +837,7 @@ MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) - : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS); + : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM); if (!FoldMI) return false; Index: lib/CodeGen/TargetInstrInfo.cpp =================================================================== --- lib/CodeGen/TargetInstrInfo.cpp +++ lib/CodeGen/TargetInstrInfo.cpp @@ -524,7 +524,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, ArrayRef Ops, int FI, - LiveIntervals *LIS) const { + LiveIntervals *LIS, + VirtRegMap *VRM) const { auto Flags = MachineMemOperand::MONone; for (unsigned OpIdx : Ops) Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore @@ -570,7 +571,7 @@ MBB->insert(MI, NewMI); } else { // Ask the target to do the actual folding. - NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS); + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM); } if (NewMI) { Index: lib/CodeGen/TargetPassConfig.cpp =================================================================== --- lib/CodeGen/TargetPassConfig.cpp +++ lib/CodeGen/TargetPassConfig.cpp @@ -1168,6 +1168,10 @@ addPass(&MachineSchedulerID); if (addRegAssignmentOptimized()) { + // Allow targets to expand pseudo instructions depending on the choice of + // registers before MachineCopyPropagation. + addPostRewrite(); + // Copy propagate to forward register uses and try to eliminate COPYs that // were not coalesced. addPass(&MachineCopyPropagationID); Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -162,7 +162,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// \returns true if a branch from an instruction with opcode \p BranchOpc /// bytes is capable of jumping to a position \p BrOffset bytes away. Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3049,7 +3049,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { // This is a bit of a hack. Consider this instruction: // // %0 = COPY %sp; GPR64all:%0 Index: lib/Target/SystemZ/CMakeLists.txt =================================================================== --- lib/Target/SystemZ/CMakeLists.txt +++ lib/Target/SystemZ/CMakeLists.txt @@ -30,6 +30,7 @@ SystemZMCInstLower.cpp SystemZRegisterInfo.cpp SystemZSelectionDAGInfo.cpp + SystemZPostRewrite.cpp SystemZShortenInst.cpp SystemZSubtarget.cpp SystemZTargetMachine.cpp Index: lib/Target/SystemZ/SystemZ.h =================================================================== --- lib/Target/SystemZ/SystemZ.h +++ lib/Target/SystemZ/SystemZ.h @@ -194,6 +194,7 @@ FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); +FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM); FunctionPass *createSystemZTDCPass(); } // end namespace llvm Index: lib/Target/SystemZ/SystemZInstrFormats.td =================================================================== --- lib/Target/SystemZ/SystemZInstrFormats.td +++ lib/Target/SystemZ/SystemZInstrFormats.td @@ -37,6 +37,12 @@ string OpKey = ""; string OpType = "none"; + // MemKey identifies a targe reg-mem opcode, while MemType can be either + // "pseudo" or "target". This is used to map a pseduo memory instruction to + // its corresponding target opcode. See comment at MemFoldPseudo. + string MemKey = ""; + string MemType = "none"; + // Many distinct-operands instructions have older 2-operand equivalents. // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs, // with NumOpsValue being "2" or "3" as appropriate. @@ -97,6 +103,12 @@ let TSFlags{20} = IsLogical; } +class Pseudo pattern> + : InstSystemZ<0, outs, ins, "", pattern> { + let isPseudo = 1; + let isCodeGenOnly = 1; +} + //===----------------------------------------------------------------------===// // Mappings between instructions //===----------------------------------------------------------------------===// @@ -120,7 +132,8 @@ let ValueCols = [["20"]]; } -// Return the memory form of a register instruction. +// Return the memory form of a register instruction. Note that this may +// return a MemFoldPseudo instruction (see below). def getMemOpcode : InstrMapping { let FilterClass = "InstSystemZ"; let RowFields = ["OpKey"]; @@ -129,6 +142,15 @@ let ValueCols = [["mem"]]; } +// Return the target memory instruction for a MemFoldPseudo. +def getTargetMemOpcode : InstrMapping { + let FilterClass = "InstSystemZ"; + let RowFields = ["MemKey"]; + let ColFields = ["MemType"]; + let KeyCol = ["pseudo"]; + let ValueCols = [["target"]]; +} + // Return the 2-operand form of a 3-operand instruction. def getTwoOperandOpcode : InstrMapping { let FilterClass = "InstSystemZ"; @@ -3279,20 +3301,26 @@ let mayLoad = 1; } -class BinaryRX opcode, SDPatternOperator operator, +class BinaryRX_base opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> : InstRXa { - let OpKey = mnemonic#"r"#cls; - let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; let AccessBytes = bytes; } +class BinaryRX opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr12only> + : BinaryRX_base { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; +} + class BinaryRXE opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes> : InstRXE opcode, SDPatternOperator operator, - RegisterOperand cls, SDPatternOperator load, bits<5> bytes, - AddressingMode mode = bdxaddr20only> +class BinaryRXY_base opcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> : InstRXYa { - let OpKey = mnemonic#"r"#cls; - let OpType = "mem"; let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; let mayLoad = 1; let AccessBytes = bytes; } +class BinaryRXY opcode, SDPatternOperator operator, + RegisterOperand cls, SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> + : BinaryRXY_base { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; +} + +// A pseudo that is used during register allocation when folding a memory +// operand. The 3-address register instruction with a spilled source cannot +// be converted directly to a target 2-address reg/mem instruction. +// Mapping: R -> MemFoldPseudo -> +class MemFoldPseudo bytes, + AddressingMode mode> + : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> { + let OpKey = mnemonic#"r"#cls; + let OpType = "mem"; + let MemKey = mnemonic#cls; + let MemType = "pseudo"; + let mayLoad = 1; + let AccessBytes = bytes; + let HasIndex = 1; + let hasNoSchedulingInfo = 1; +} + +multiclass BinaryRXYAndPseudo opcode, + SDPatternOperator operator, RegisterOperand cls, + SDPatternOperator load, bits<5> bytes, + AddressingMode mode = bdxaddr20only> { + + def "" : BinaryRXY_base { + let MemKey = mnemonic#cls; + let MemType = "target"; + } + let Has20BitOffset = 1 in + def _MemFoldPseudo : MemFoldPseudo; +} + multiclass BinaryRXPair rxOpcode, bits<16> rxyOpcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes> { @@ -3347,6 +3412,24 @@ } } +multiclass BinaryRXPairAndPseudo rxOpcode, + bits<16> rxyOpcode, SDPatternOperator operator, + RegisterOperand cls, + SDPatternOperator load, bits<5> bytes> { + let DispKey = mnemonic ## #cls in { + def "" : BinaryRX_base { + let DispSize = "12"; + let MemKey = mnemonic#cls; + let MemType = "target"; + } + let DispSize = "20" in + def Y : BinaryRXY_base; + } + def _MemFoldPseudo : MemFoldPseudo; +} + class BinarySI opcode, SDPatternOperator operator, Operand imm, AddressingMode mode = bdaddr12only> : InstSI pattern> - : InstSystemZ<0, outs, ins, "", pattern> { - let isPseudo = 1; - let isCodeGenOnly = 1; -} - // Like UnaryRI, but expanded after RA depending on the choice of register. class UnaryRIPseudo Index: lib/Target/SystemZ/SystemZInstrInfo.h =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.h +++ lib/Target/SystemZ/SystemZInstrInfo.h @@ -143,6 +143,7 @@ namespace SystemZ { int getTwoOperandOpcode(uint16_t Opcode); +int getTargetMemOpcode(uint16_t Opcode); } class SystemZInstrInfo : public SystemZGenInstrInfo { @@ -252,7 +253,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; MachineInstr *foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, Index: lib/Target/SystemZ/SystemZInstrInfo.cpp =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.cpp +++ lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -1009,7 +1009,7 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS) const { + LiveIntervals *LIS, VirtRegMap *VRM) const { const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo(); unsigned Size = MFI.getObjectSize(FrameIndex); @@ -1163,12 +1163,39 @@ } } - // If the spilled operand is the final one, try to change R - // into . - int MemOpcode = SystemZ::getMemOpcode(Opcode); + // If the spilled operand is the final one or the instruction is + // commutable, try to change R into . + unsigned NumOps = MI.getNumExplicitOperands(); + int MemOpcode = -1; + + // See if this is a 3-address instruction that is convertible to 2-address + // and suitable for folding below. Only try this whith virtual registers + // and a provided VRM (during regalloc). + bool NeedsCommute = false; + int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(Opcode); + int MemFoldPseudoOpc = + (TwoOperandOpcode == -1 ? -1 : SystemZ::getMemOpcode(TwoOperandOpcode)); + if (VRM != nullptr && MemFoldPseudoOpc != -1) { + assert(NumOps == 3 && "Expected two source registers."); + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned DstPhys = + (TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg); + if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys)) { + unsigned SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg() + : ((OpNum == 1 && MI.isCommutable()) + ? MI.getOperand(2).getReg() + : 0)); + if (SrcReg && TRI->isVirtualRegister(SrcReg) && + DstPhys == VRM->getPhys(SrcReg)) { + MemOpcode = MemFoldPseudoOpc; + NeedsCommute = (OpNum == 1); + } + } + } else + MemOpcode = SystemZ::getMemOpcode(Opcode); + if (MemOpcode >= 0) { - unsigned NumOps = MI.getNumExplicitOperands(); - if (OpNum == NumOps - 1) { + if ((OpNum == NumOps - 1) || NeedsCommute) { const MCInstrDesc &MemDesc = get(MemOpcode); uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); assert(AccessBytes != 0 && "Size of access should be known"); @@ -1176,8 +1203,12 @@ uint64_t Offset = Size - AccessBytes; MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, MI.getDebugLoc(), get(MemOpcode)); - for (unsigned I = 0; I < OpNum; ++I) - MIB.add(MI.getOperand(I)); + MIB.add(MI.getOperand(0)); + if (NeedsCommute) + MIB.add(MI.getOperand(2)); + else + for (unsigned I = 1; I < OpNum; ++I) + MIB.add(MI.getOperand(I)); MIB.addFrameIndex(FrameIndex).addImm(Offset); if (MemDesc.TSFlags & SystemZII::HasIndex) MIB.addReg(0); Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -916,11 +916,11 @@ // Addition of memory. defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>; - defm A : BinaryRXPair<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>; + defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>; def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>, Requires<[FeatureMiscellaneousExtensions2]>; def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>; - def AG : BinaryRXY<"ag", 0xE308, z_sadd, GR64, load, 8>; + defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, load, 8>; // Addition to memory. def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; @@ -958,9 +958,9 @@ Requires<[FeatureHighWord]>; // Addition of memory. - defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>; + defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>; def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>; - def ALG : BinaryRXY<"alg", 0xE30A, z_uadd, GR64, load, 8>; + defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, load, 8>; // Addition to memory. def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>; @@ -1003,11 +1003,11 @@ // Subtraction of memory. defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>; - defm S : BinaryRXPair<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>; + defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>; def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>, Requires<[FeatureMiscellaneousExtensions2]>; def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>; - def SG : BinaryRXY<"sg", 0xE309, z_ssub, GR64, load, 8>; + defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, load, 8>; } defm : SXB; @@ -1055,9 +1055,9 @@ def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>; // Subtraction of memory. - defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>; + defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>; def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>; - def SLG : BinaryRXY<"slg", 0xE30B, z_usub, GR64, load, 8>; + defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, load, 8>; } defm : ZXB; @@ -1132,8 +1132,8 @@ // ANDs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; - def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; + defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>; + defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>; } // AND to memory @@ -1189,8 +1189,8 @@ // ORs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; - def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; + defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>; + defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>; } // OR to memory @@ -1229,8 +1229,8 @@ // XORs of memory. let CCValues = 0xC, CompareZeroCCMask = 0x8 in { - defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; - def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; + defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>; + defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>; } // XOR to memory Index: lib/Target/SystemZ/SystemZPostRewrite.cpp =================================================================== --- /dev/null +++ lib/Target/SystemZ/SystemZPostRewrite.cpp @@ -0,0 +1,124 @@ +//==---- SystemZPostRewrite.cpp - Select pseudos after RegAlloc ---*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that is run immediately after VirtRegRewriter +// but before MachineCopyPropagation. The purpose is to lower pseudos to +// target instructions before any later pass might substitute a register for +// another. +// +//===----------------------------------------------------------------------===// + +#include "SystemZ.h" +#include "SystemZInstrInfo.h" +#include "SystemZSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +using namespace llvm; + +#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass" + +#define DEBUG_TYPE "systemz-postrewrite" +STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops."); + +namespace llvm { + void initializeSystemZPostRewritePass(PassRegistry&); +} + +namespace { + +class SystemZPostRewrite : public MachineFunctionPass { +public: + static char ID; + SystemZPostRewrite() : MachineFunctionPass(ID) { + initializeSystemZPostRewritePass(*PassRegistry::getPassRegistry()); + } + + const SystemZInstrInfo *TII; + + bool runOnMachineFunction(MachineFunction &Fn) override; + + StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool selectMBB(MachineBasicBlock &MBB); +}; + +char SystemZPostRewrite::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite", + SYSTEMZ_POSTREWRITE_NAME, false, false) + +/// Returns an instance of the Post Rewrite pass. +FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) { + return new SystemZPostRewrite(); +} + +/// If MBBI references a pseudo instruction that should be selected here, +/// do it and return true. Otherwise return false. +bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + + // Note: If this could be done during regalloc in foldMemoryOperandImpl() + // while also updating the LiveIntervals, there would be no need for the + // MemFoldPseudo to begin with. + int TargetMemOpcode = SystemZ::getTargetMemOpcode(Opcode); + if (TargetMemOpcode != -1) { + MI.setDesc(TII->get(TargetMemOpcode)); + MI.tieOperands(0, 1); + unsigned DstReg = MI.getOperand(0).getReg(); + MachineOperand &SrcMO = MI.getOperand(1); + if (DstReg != SrcMO.getReg()) { + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg) + .addReg(SrcMO.getReg()); + SrcMO.setReg(DstReg); + MemFoldCopies++; + } + return true; + } + + return false; +} + +/// Iterate over the instructions in basic block MBB and select any +/// pseudo instructions. Return true if anything was modified. +bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= selectMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast(MF.getSubtarget().getInstrInfo()); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= selectMBB(MBB); + + return Modified; +} + Index: lib/Target/SystemZ/SystemZTargetMachine.cpp =================================================================== --- lib/Target/SystemZ/SystemZTargetMachine.cpp +++ lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -183,6 +183,7 @@ void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; + void addPostRewrite() override; void addPreSched2() override; void addPreEmitPass() override; }; @@ -212,7 +213,16 @@ return true; } +void SystemZPassConfig::addPostRewrite() { + addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); +} + void SystemZPassConfig::addPreSched2() { + // PostRewrite needs to be run at -O0 also (in which case addPostRewrite() + // is not called). + if (getOptLevel() == CodeGenOpt::None) + addPass(createSystemZPostRewritePass(getSystemZTargetMachine())); + addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); if (getOptLevel() != CodeGenOpt::None) Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -350,7 +350,8 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, - LiveIntervals *LIS = nullptr) const override; + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -4773,7 +4773,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, MachineBasicBlock::iterator InsertPt, - int FrameIndex, LiveIntervals *LIS) const { + int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { // Check switch flag if (NoFusing) return nullptr; Index: test/CodeGen/SystemZ/and-01.ll =================================================================== --- test/CodeGen/SystemZ/and-01.ll +++ test/CodeGen/SystemZ/and-01.ll @@ -135,8 +135,7 @@ define i32 @f12(i32 *%ptr0) { ; CHECK-LABEL: f12: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; _CHECK: n %r2, 16{{[04]}}(%r15) +; CHECK: n %r2, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/and-03.ll =================================================================== --- test/CodeGen/SystemZ/and-03.ll +++ test/CodeGen/SystemZ/and-03.ll @@ -100,8 +100,7 @@ define i64 @f9(i64 *%ptr0) { ; CHECK-LABEL: f9: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: ng %r2, 160(%r15) +; CHECK: ng %r2, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/int-add-02.ll =================================================================== --- test/CodeGen/SystemZ/int-add-02.ll +++ test/CodeGen/SystemZ/int-add-02.ll @@ -135,8 +135,7 @@ define i32 @f12(i32 *%ptr0) { ; CHECK-LABEL: f12: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: a %r2, 16{{[04]}}(%r15) +; CHECK: a %r2, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/int-add-05.ll =================================================================== --- test/CodeGen/SystemZ/int-add-05.ll +++ test/CodeGen/SystemZ/int-add-05.ll @@ -102,8 +102,7 @@ ; CHECK-LABEL: f9: ; CHECK: brasl %r14, foo@PLT ; Z10: ag %r2, 168(%r15) -; TODO: This test is temporarily failing due to D60888. -; Z196_: ag %r0, 168(%r15) +; Z196: ag %r0, 168(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/int-add-08.ll =================================================================== --- test/CodeGen/SystemZ/int-add-08.ll +++ test/CodeGen/SystemZ/int-add-08.ll @@ -116,8 +116,7 @@ define void @f8(i128 *%ptr0) { ; CHECK-LABEL: f8: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: alg {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: alg {{%r[0-9]+}}, {{[0-9]+}}(%r15) ; CHECK: alcg {{%r[0-9]+}}, {{[0-9]+}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i128, i128 *%ptr0, i128 2 Index: test/CodeGen/SystemZ/int-sub-01.ll =================================================================== --- test/CodeGen/SystemZ/int-sub-01.ll +++ test/CodeGen/SystemZ/int-sub-01.ll @@ -135,8 +135,7 @@ define i32 @f12(i32 *%ptr0) { ; CHECK-LABEL: f12: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: s %r2, 16{{[04]}}(%r15) +; CHECK: s %r2, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/int-sub-04.ll =================================================================== --- test/CodeGen/SystemZ/int-sub-04.ll +++ test/CodeGen/SystemZ/int-sub-04.ll @@ -100,8 +100,7 @@ define i64 @f9(i64 *%ptr0) { ; CHECK-LABEL: f9: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: sg %r2, 160(%r15) +; CHECK: sg %r2, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/int-sub-05.ll =================================================================== --- test/CodeGen/SystemZ/int-sub-05.ll +++ test/CodeGen/SystemZ/int-sub-05.ll @@ -125,8 +125,7 @@ define void @f8(i128 *%ptr0) { ; CHECK-LABEL: f8: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: slg {{%r[0-9]+}}, {{[0-9]+}}(%r15) +; CHECK: slg {{%r[0-9]+}}, {{[0-9]+}}(%r15) ; CHECK: slbg {{%r[0-9]+}}, {{[0-9]+}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i128, i128 *%ptr0, i128 2 Index: test/CodeGen/SystemZ/or-01.ll =================================================================== --- test/CodeGen/SystemZ/or-01.ll +++ test/CodeGen/SystemZ/or-01.ll @@ -135,8 +135,7 @@ define i32 @f12(i32 *%ptr0) { ; CHECK-LABEL: f12: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: o %r2, 16{{[04]}}(%r15) +; CHECK: o %r2, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/or-03.ll =================================================================== --- test/CodeGen/SystemZ/or-03.ll +++ test/CodeGen/SystemZ/or-03.ll @@ -100,8 +100,7 @@ define i64 @f9(i64 *%ptr0) { ; CHECK-LABEL: f9: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: og %r2, 160(%r15) +; CHECK: og %r2, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/xor-01.ll =================================================================== --- test/CodeGen/SystemZ/xor-01.ll +++ test/CodeGen/SystemZ/xor-01.ll @@ -135,8 +135,7 @@ define i32 @f12(i32 *%ptr0) { ; CHECK-LABEL: f12: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: x %r2, 16{{[04]}}(%r15) +; CHECK: x %r2, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 Index: test/CodeGen/SystemZ/xor-03.ll =================================================================== --- test/CodeGen/SystemZ/xor-03.ll +++ test/CodeGen/SystemZ/xor-03.ll @@ -100,8 +100,7 @@ define i64 @f9(i64 *%ptr0) { ; CHECK-LABEL: f9: ; CHECK: brasl %r14, foo@PLT -; TODO: This test is temporarily failing due to D60888. -; CHECK_: xg %r2, 160(%r15) +; CHECK: xg %r2, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4