Index: include/llvm/CodeGen/CalcSpillWeights.h =================================================================== --- include/llvm/CodeGen/CalcSpillWeights.h +++ include/llvm/CodeGen/CalcSpillWeights.h @@ -67,6 +67,10 @@ /// \brief (re)compute li's spill weight and allocation hint. void calculateSpillWeightAndHint(LiveInterval &li); + + static unsigned copyHint(const MachineInstr *, unsigned, + const TargetRegisterInfo &, + const MachineRegisterInfo &); }; /// \brief Compute spill weights and allocation hints for all virtual register Index: lib/CodeGen/CalcSpillWeights.cpp =================================================================== --- lib/CodeGen/CalcSpillWeights.cpp +++ lib/CodeGen/CalcSpillWeights.cpp @@ -43,9 +43,9 @@ } // Return the preferred allocation register for reg, given a COPY instruction. -static unsigned copyHint(const MachineInstr *mi, unsigned reg, - const TargetRegisterInfo &tri, - const MachineRegisterInfo &mri) { +unsigned VirtRegAuxInfo::copyHint(const MachineInstr *mi, unsigned reg, + const TargetRegisterInfo &tri, + const MachineRegisterInfo &mri) { unsigned sub, hreg, hsub; if (mi->getOperand(0).getReg() == reg) { sub = mi->getOperand(0).getSubReg(); Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -20,6 +20,7 @@ X86FixupBWInsts.cpp X86FixupLEAs.cpp X86FixupSetCC.cpp + X86FixupZExt.cpp X86FloatingPoint.cpp X86FrameLowering.cpp X86ISelDAGToDAG.cpp Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -62,6 +62,8 @@ /// Return a pass that transforms setcc + movzx pairs into xor + setcc. FunctionPass *createX86FixupSetCC(); +FunctionPass *createX86FixupZExt(); + /// Return a pass that expands WinAlloca pseudo-instructions. FunctionPass *createX86WinAllocaExpander(); Index: lib/Target/X86/X86FixupZExt.cpp =================================================================== --- /dev/null +++ lib/Target/X86/X86FixupZExt.cpp @@ -0,0 +1,688 @@ +#include "X86Subtarget.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" + +#include + +#define DEBUG_TYPE "x86-fixup-zext" + +namespace { +using namespace llvm; +using std::unique_ptr; +using std::vector; +using std::pair; +using std::copy_if; +using Segment = LiveRange::Segment; + +template +using is_iterable_of = typename std::enable_if().begin())>::type, + Elem>::value>::type; + +unsigned getPhys(unsigned reg, const VirtRegMap &vrm) { + return TargetRegisterInfo::isVirtualRegister(reg) ? vrm.getPhys(reg) : reg; +} + +unsigned getPhys(const MachineOperand ®op, const VirtRegMap &vrm) { + const auto *f = regop.getParent()->getParent()->getParent(); + const auto &tri = *f->getSubtarget().getRegisterInfo(); + assert(regop.isReg()); + unsigned preg = getPhys(regop.getReg(), vrm); + return regop.getSubReg() ? tri.getSubReg(preg, regop.getSubReg()) : preg; +} + +unsigned getPhys(const MachineInstr &i, unsigned opnum, const VirtRegMap &vrm) { + return getPhys(i.getOperand(opnum), vrm); +} + +void eraseInstr(MachineInstr &i, LiveIntervals &li) { + li.RemoveMachineInstrFromMaps(i); + i.eraseFromParent(); +} + +DenseMap +dominatingDefs(unsigned gr8, const MachineRegisterInfo &mri, + const SlotIndexes &si) { + DenseMap defs; + // at least until release_37, getInstructionIndex is expensive. + DenseMap cached; + + for (MachineInstr &def : mri.def_instructions(gr8)) { + unsigned tied_use; + if (def.isRegTiedToUseOperand(0, &tied_use) && + def.getOperand(tied_use).getReg() != def.getOperand(0).getReg()) { + DEBUG(dbgs() << "dominatingDefs: " << def.getOperand(0) << " is tied to " + << def.getOperand(tied_use) << "\n"); + return dominatingDefs(def.getOperand(tied_use).getReg(), mri, si); + } + MachineBasicBlock *bb = def.getParent(); + if (defs.find(bb) == defs.end() || + si.getInstructionIndex(def) < cached.lookup(bb)) { + cached[bb] = si.getInstructionIndex(def); + defs[bb] = &def; + } + } + return defs; +} + +void addSeg(SlotIndex s, SlotIndex e, LiveInterval &live, LiveIntervals &li) { + assert(live.vni_begin() != live.vni_end()); + live.addSegment(Segment(std::move(s), std::move(e), *live.vni_begin())); +} + +void addSeg(MachineInstr &s, MachineInstr &e, LiveInterval &live, + LiveIntervals &li) { + return addSeg(li.getInstructionIndex(s), li.getInstructionIndex(e), live, li); +} + +void addSegs(LiveInterval &src, LiveInterval &dest, LiveIntervals &li) { + for (const Segment &s : src) { + addSeg(s.start, s.end, dest, li); + } +} + +bool mov32r0Segs(MachineInstr &def8, + SmallVectorImpl> &segs, + LiveIntervals &li) { + const MachineFunction &f = *def8.getParent()->getParent(); + const auto &tri = f.getSubtarget().getRegisterInfo(); + MachineBasicBlock &bb = *def8.getParent(); + MachineBasicBlock::iterator ins = &def8; + + if (const Segment *eflagseg = + li.getRegUnit(*MCRegUnitIterator(X86::EFLAGS, tri)) + .getSegmentContaining(slot(def8))) { + if (eflagseg->start <= slot(*bb.begin()) && bb.isLiveIn(X86::EFLAGS)) { + if (bb.pred_size() > 1) { + return false; + } + segs.push_back(make_pair(bb.begin(), &def8)); + return mov32r0Segs(*(*bb.pred_begin())->rbegin(), segs, li); + } + ins = li.getInstructionFromIndex(eflagseg->start); + } + segs.push_back(make_pair(ins, &def8)); + return true; +} + +template > +raw_ostream &operator<<(raw_ostream &out, const T &es) { + for (LiveInterval *e : es) { + out << "\t" << (*e) << "\n"; + } + return out; +} + +template > +bool interferes(const T &as, const LiveInterval &b, + const MachineRegisterInfo &mri) { + return any_of(as, [&](const LiveInterval *a) { return a->overlaps(b); }); +} + +struct ReAllocTool { + const TargetRegisterInfo *tri; + const MachineRegisterInfo *mri; + LiveRegMatrix *lrm; + VirtRegMap *vrm; + RegisterClassInfo rci; + BitVector unused_csr; + + void addRegToBitVec(BitVector &bv, MCPhysReg reg) const { + for (MCRegAliasIterator r(reg, tri, true); r.isValid(); ++r) { + bv.set(*r); + } + } + + BitVector bitVecFromRegs(ArrayRef regs) const { + BitVector rv(tri->getNumRegs()); + for (const MCPhysReg &r : regs) { + addRegToBitVec(rv, r); + } + return rv; + } + + ReAllocTool(const MachineFunction &f, LiveRegMatrix &lrm_, VirtRegMap &vrm_) + : tri(f.getSubtarget().getRegisterInfo()), mri(&f.getRegInfo()), + lrm(&lrm_), vrm(&vrm_), rci(), unused_csr(tri->getNumRegs()) { + const MCPhysReg *csr = tri->getCalleeSavedRegs(&f); + for (unsigned i = 0; csr[i] != 0; i += 1) { + if (!lrm->isPhysRegUsed(csr[i])) { + addRegToBitVec(unused_csr, csr[i]); + } + } + rci.runOnMachineFunction(f); + } + + bool interf(LiveInterval &live, unsigned preg) const { + return lrm->checkInterference(live, preg) != LiveRegMatrix::IK_Free; + } + + template > + bool interf(LiveInterval &live, unsigned preg, T &evictees) const { + if (lrm->checkRegMaskInterference(live, preg) || + lrm->checkRegUnitInterference(live, preg)) { + return true; + } + DenseSet ev; + for (MCRegUnitIterator regunit(preg, tri); regunit.isValid(); ++regunit) { + LiveIntervalUnion::Query &q = lrm->query(live, *regunit); + if (q.collectInterferingVRegs() > 0) { + for (LiveInterval *l : q.interferingVRegs()) { + ev.insert(l); + } + } + } + std::copy(ev.begin(), ev.end(), back_inserter(evictees)); + return evictees.size() > 0; + } + + const MCPhysReg *allocNext(LiveInterval &live, + const BitVector *except = nullptr, + ArrayRef::iterator *it = nullptr, + const TargetRegisterClass *rc = nullptr) const { + ArrayRef ord = + rci.getOrder(rc ? rc : mri->getRegClass(live.reg)); + BitVector rs = unused_csr; + if (except != nullptr) { + rs |= *except; + } + auto rv = std::find_if( + it ? std::next(*it) : ord.begin(), ord.end(), + [&](MCPhysReg r) { return !rs.test(r) && !interf(live, r); }); + return rv == ord.end() ? nullptr : rv; + } + + MCPhysReg alloc(LiveInterval &live, const BitVector *except = nullptr, + const TargetRegisterClass *rc = nullptr) const { + const MCPhysReg *rv = allocNext(live, except, nullptr, rc); + return rv == nullptr ? 0 : *rv; + } + + // (re-)allocate a group of interfering intervals. brute force search. returns + // nullptr if impossible. + template > + unique_ptr>> + allocInterfIntervals(C group, const BitVector *except = nullptr) const { + if (group.empty()) { + return make_unique>>(); + } + auto assigned = + make_unique>>(); + + auto maybeUnassign = [&](pair &p) { + if (p.second) { + lrm->unassign(*p.first); + } + }; + + auto maybeAssign = [&](pair &p) { + if (p.second) { + lrm->assign(*p.first, *p.second); + } + }; + + auto tryNextInGroup = [&]() { + assert(!group.empty()); + assigned->push_back( + std::make_pair(group.back(), allocNext(*group.back(), except))); + group.pop_back(); + maybeAssign(assigned->back()); + }; + + auto backToPrevious = [&]() { + assert(!assigned->empty()); + maybeUnassign(assigned->back()); + group.push_back(assigned->back().first); + assigned->pop_back(); + }; + + auto tryNextReg = [&]() { + assert(!assigned->empty()); + maybeUnassign(assigned->back()); + assigned->back().second = + allocNext(*assigned->back().first, except, &assigned->back().second); + maybeAssign(assigned->back()); + }; + + tryNextInGroup(); + + while (!group.empty() || assigned->back().second == nullptr) { + if (assigned->back().second == nullptr) { + backToPrevious(); + if (assigned->empty()) { + return nullptr; + } + tryNextReg(); + } else { + tryNextInGroup(); + } + } + for (auto &p : *assigned) { + lrm->unassign(*p.first); + } + return assigned; + } + + template > + unique_ptr> + evictIntervals(const C &lives, const BitVector *excepts = nullptr) const { + DenseMap newmap; + vector ungrouped(lives.begin(), lives.end()); + + while (!ungrouped.empty()) { + vector group; + group.push_back(ungrouped.back()); + ungrouped.pop_back(); + bool done = false; + while (!done) { + auto it = std::partition( + ungrouped.begin(), ungrouped.end(), + [&](LiveInterval *h) { return !interferes(group, *h, *mri); }); + done = it == ungrouped.end(); + std::copy(it, ungrouped.end(), back_inserter(group)); + ungrouped.erase(it, ungrouped.end()); + } + if (auto newassigns = allocInterfIntervals(group, excepts)) { + for (auto pair_ : *newassigns) { + newmap.insert(pair_); + } + } else { + return nullptr; + } + } + auto rv = make_unique>(); + transform(lives, back_inserter(*rv), + [&](LiveInterval *l) { return *newmap[l]; }); + return rv; + } + + MCPhysReg unassign(LiveInterval &live) { + unsigned old = getPhys(live.reg, *vrm); + lrm->unassign(live); + return old; + } + + template > + vector unassignAll(C &lives) { + vector r; + transform(lives, back_inserter(r), + [&](LiveInterval *l) { return unassign(*l); }); + return r; + } + + template , + typename = is_iterable_of> + void assignAll(C &lives, D &®s) { + for (auto intv_reg : zip_first(lives, std::forward(regs))) { + lrm->assign(*std::get<0>(intv_reg), std::get<1>(intv_reg)); + } + } + + bool reservePhysReg(MCPhysReg preg, LiveInterval &live) { + vector evictees; + if (!interf(live, preg, evictees)) { + DEBUG(dbgs() << "ReAllocTool: " << tri->getName(preg) + << " is already free.\n"); + return true; + } else if (evictees.size() > 0) { + DEBUG(dbgs() << "ReAllocTool: trying to reserve " << tri->getName(preg) + << " by evicting:\n" + << evictees); + vector oldregs = unassignAll(evictees); + BitVector bv = bitVecFromRegs(preg); + if (auto newregs = evictIntervals(evictees, &bv)) { + assignAll(evictees, *newregs); + return true; + } + assignAll(evictees, oldregs); + } + DEBUG(dbgs() << "ReAllocTool: unable to reserve " << tri->getName(preg) + << "\n"); + return false; + } +}; + +struct Candidate { + MachineInstr *ins; + MachineInstr *gr8def; + MachineInstr *movzx; + LiveIntervals *li; + LiveInterval *live32; + LiveInterval *live8; + unique_ptr extra; + vector constraints; + +private: + unsigned pdest; + unsigned psrc; + +public: + static MachineInstr *validCandidate(MachineInstr &i, LiveIntervals &li) { + if (i.getOpcode() != X86::MOVZX32rr8 || i.getOperand(1).getSubReg() != 0) { + return nullptr; + } + + const MachineFunction &f = *i.getParent()->getParent(); + const MachineRegisterInfo &mri = f.getRegInfo(); + const TargetRegisterInfo &tri = *f.getSubtarget().getRegisterInfo(); + + unsigned src = i.getOperand(1).getReg(); + auto bbdefs = dominatingDefs(src, mri, *li.getSlotIndexes()); + if (bbdefs.size() > 1 || (mri.getSimpleHint(src) && + !tri.isVirtualRegister(mri.getSimpleHint(src)))) { + DEBUG(dbgs() << "passing over " << i << "defs: " << bbdefs.size() + << ", gr8 hint: " << PrintReg(mri.getSimpleHint(src), &tri) + << "\n"); + return nullptr; + } + return bbdefs.begin()->second; + } + + Candidate(const SmallVectorImpl> &segs, + MachineInstr &gr8, MachineInstr &movzx_, + const TargetRegisterInfo &tri, LiveIntervals &li_) + : ins(nullptr), gr8def(&gr8), movzx(&movzx_), li(&li_), + live32(&li->getInterval(movzx->getOperand(0).getReg())), + live8(&li->getInterval(movzx->getOperand(1).getReg())), + extra(new LiveInterval(live32->reg, live32->weight)), constraints() { + MachineBasicBlock &bb = *segs.front().first->getParent(); + const TargetInstrInfo &tii = *bb.getParent()->getSubtarget().getInstrInfo(); + ins = BuildMI(bb, segs.front().first, movzx->getDebugLoc(), + tii.get(X86::MOV32r0), 0); + li->InsertMachineInstrInMaps(*ins); + extra->getNextValue(li->getInstructionIndex(*ins), + li->getVNInfoAllocator()); + addSeg(*ins, *segs.front().second, *extra, *li); + for (auto p : make_range(std::next(segs.begin()), segs.end())) { + addSeg(*p.first, *p.second, *extra, *li); + } + } + + ~Candidate() { + if (ins) { + eraseInstr(ins, *li); + } + } + + Candidate(Candidate &&c) + : ins(c.ins), gr8def(c.gr8def), movzx(c.movzx), li(c.li), + live32(c.live32), live8(c.live8), extra(std::move(c.extra)), + constraints(std::move(c.constraints)) { + c.ins = nullptr; + } + + Candidate &operator=(Candidate &&c) { + ins = c.ins; + gr8def = c.gr8def; + movzx = c.movzx; + live32 = c.live32; + live8 = c.live8; + li = c.li; + extra = std::move(c.extra); + constraints = std::move(c.constraints); + c.ins = nullptr; + return *this; + } + + static unique_ptr fromMI(MachineInstr &i, LiveIntervals &li, + const VirtRegMap &vrm) { + const MachineFunction &f = *i.getParent()->getParent(); + const MachineRegisterInfo &mri = f.getRegInfo(); + const TargetRegisterInfo &tri = *f.getSubtarget().getRegisterInfo(); + + MachineInstr *def; + SmallVector, 4> segs; + if ((def = validCandidate(i, li)) == nullptr || + !mov32r0Segs(*def, segs, li)) { + return nullptr; + } + + Candidate c(segs, *def, i, tri, li); + if (c.live32->overlaps(*c.extra)) { + return nullptr; + } + + addSegs(*c.live32, *c.extra, li); + addSegs(*c.live8, *c.extra, li); + + unsigned dest = i.getOperand(0).getReg(); + // look for copy instr reg alloc hints + for (const MachineInstr &use : mri.use_instructions(dest)) { + if (use.isCopy()) { + if (unsigned hint = + getPhys(VirtRegAuxInfo::copyHint(&use, dest, tri, mri), vrm)) { + c.constraints.push_back(hint); + } + } + } + + // look for regmask constraints if no hints were found + if (c.constraints.empty()) { + BitVector regmask; + if (li.checkRegMaskInterference(*c.extra, regmask)) { + const TargetRegisterClass &destcls = *mri.getRegClass(dest); + copy_if(destcls.begin(), destcls.end(), back_inserter(c.constraints), + [&](MCPhysReg r) { return regmask.test(reg); }); + } + } + return make_unique(std::move(c)); + } + + bool operator<(const Candidate &b) const { + if (constraints.size() > 0 && b.constraints.size() == 0) + return true; + if (b.constraints.size() > 0 && constraints.size() == 0) + return false; + if (constraints.size() < b.constraints.size()) + return true; + return li_size() > b.li_size(); + } + + unsigned li_size() const { return extra->getSize(); } + + friend raw_ostream &operator<<(raw_ostream &out, const Candidate &c) { + out << "Candidate:\n\tinserted: " << (*c.ins) + << "\tgr8 def: " << (*c.gr8def) << "\tmovzx: " << (*c.movzx) + << "\txor gr32: " << (*c.extra); + if (c.constraints.size() > 0) { + out << "\n\tconstraints:"; + for (unsigned cx : c.constraints) { + out << " " << PrintReg(cx, &c.tri()); + } + } else { + out << "\n\tno constraints."; + } + return out; + } + + const X86RegisterInfo &tri() const { + return *reinterpret_cast( + ins->getParent()->getParent()->getSubtarget().getRegisterInfo()); + } + + const X86InstrInfo &tii() const { + return *reinterpret_cast( + ins->getParent()->getParent()->getSubtarget().getInstrInfo()); + } + + MachineRegisterInfo &mri() const { + return ins->getParent()->getParent()->getRegInfo(); + } + + void unassign(ReAllocTool &ratool) { + pdest = ratool.unassign(*live32); + psrc = ratool.unassign(*live8); + } + + void assignOld(LiveRegMatrix &lrm) { + lrm.assign(*live32, pdest); + lrm.assign(*live8, psrc); + pdest = psrc = 0; + } + + void assignNew(LiveRegMatrix &lrm, MCPhysReg newdest) { + // vsrc uses => vdest:sub_8bit; insert vdest = mov32r0; del movzx + unsigned vdest = movzx->getOperand(0).getReg(); + unsigned vsrc = movzx->getOperand(1).getReg(); + + // in-place operand mutation would confuse defusechain_iterator + vector ops; + transform(mri().reg_operands(vsrc), back_inserter(ops), + [](MachineOperand &op) { return &op; }); + for (MachineOperand *op : ops) { + DEBUG(dbgs() << "changing " << (*op->getParent())); + op->substVirtReg(vdest, X86::sub_8bit, tri()); + DEBUG(dbgs() << "to " << (*op->getParent())); + } + + eraseInstr(*movzx, *li); + li->removeInterval(vsrc); + li->removeInterval(vdest); + + const TargetRegisterClass &destcls = *mri().getRegClass(vdest); + ins->getOperand(0).setReg(vdest); + if (destcls.getSize() > 32 / 8) { + ins->getOperand(0).setSubReg(X86::sub_32bit); + ins->getOperand(0).setIsUndef(); + } + if (const TargetRegisterClass *newcls = gr8def->getRegClassConstraintEffect( + 0, ins->getRegClassConstraintEffect(0, &destcls, &tii(), &tri()), + &tii(), &tri())) { + DEBUG(dbgs() << "updating reg class from " + << tri().getRegClassName(&destcls) << " to " + << tri().getRegClassName(newcls) << "\n"); + mri().setRegClass(vdest, newcls); + } else { + DEBUG(dbgs() << "not updating reg class\n"); + } + li->removeInterval(vdest); + lrm.assign(li->createAndComputeVirtRegInterval(vdest), newdest); + ins = nullptr; // prevent erasure of mov32r0 by dtor + } + + bool validDestReg(MCPhysReg physreg) const { + return mri().getRegClass(movzx->getOperand(0).getReg())->contains(physreg); + } +}; + +struct X86FixupZExt : public MachineFunctionPass { + static char id; + + X86FixupZExt() : MachineFunctionPass(id) {} + + const char *getPassName() const override { + return "X86 Zero-Extension Fix-up"; + } + + void getAnalysisUsage(AnalysisUsage &a) const override { + a.addRequired(); + a.addRequired(); + a.addRequired(); + a.setPreservesAll(); + return MachineFunctionPass::getAnalysisUsage(a); + } + + bool runOnMachineFunction(MachineFunction &f) override { + VirtRegMap &vrm = getAnalysis(); + LiveIntervals &li = getAnalysis(); + LiveRegMatrix &lrm = getAnalysis(); + vector constrained, cands; + ReAllocTool ratool(f, lrm, vrm); + + DEBUG(dbgs() << "analyzing " << f.getName() << "'s movzxes.\n"); + for (MachineBasicBlock &bb : f) { + for (MachineInstr &i : bb) { + if (auto cand = Candidate::fromMI(i, li, vrm)) { + if (cand->constraints.size() > 0) { + constrained.push_back(std::move(*cand.release())); + } else { + cands.push_back(std::move(*cand.release())); + } + } + } + } + + BitVector nosub8; + if (f.getSubtarget().is64Bit()) { + nosub8 = ratool.bitVecFromRegs({X86::RIP}); + } else { + nosub8 = ratool.bitVecFromRegs(ArrayRef( + X86::GR32_ABCDRegClass.begin(), X86::GR32_ABCDRegClass.end())); + nosub8.flip(); + } + + auto reserveOneOf = [&](ArrayRef regs, const Candidate &c) { + for (MCPhysReg preg : regs) { + if (!nosub8.test(preg) && c.validDestReg(preg) && + !ratool.unused_csr.test(preg) && + ratool.reservePhysReg(preg, *c.extra)) { + return preg; + } + } + return static_cast(0); + }; + + DEBUG(vrm.print(dbgs())); + DEBUG(f.print(dbgs(), li.getSlotIndexes())); + std::sort(constrained.begin(), constrained.end()); + for (Candidate &c : constrained) { + DEBUG(dbgs() << c << "\n"); + c.unassign(ratool); + if (unsigned newreg = reserveOneOf(c.constraints, c)) { + DEBUG(dbgs() << "works\n"); + c.assignNew(lrm, newreg); + } else { + c.assignOld(lrm); + if (none_of(c.constraints, [&](MCPhysReg r) { + return r == getPhys(*c.movzx, 0, vrm); + })) { + // only demote if RA pass missed all hints + c.constraints.clear(); + DEBUG(dbgs() << "demoting to unconstrained candidate\n"); + cands.push_back(std::move(c)); + } else { + DEBUG(dbgs() << "could not transform\n"); + } + } + } + + std::sort(cands.begin(), cands.end()); + for (Candidate &c : cands) { + DEBUG(dbgs() << c << "\n"); + c.unassign(ratool); + MCPhysReg newreg; + if (!f.getSubtarget().is64Bit() && + ((newreg = ratool.alloc(*c.extra, &nosub8)) != 0 || + (newreg = + reserveOneOf(ArrayRef(X86::GR32_ABCDRegClass.begin(), + X86::GR32_ABCDRegClass.end()), + c)) != 0)) { + DEBUG(dbgs() << "works\n"); + c.assignNew(lrm, newreg); + } else if (f.getSubtarget().is64Bit() && + (newreg = ratool.alloc(*c.extra, &nosub8)) != 0) { + DEBUG(dbgs() << "works\n"); + c.assignNew(lrm, newreg); + } else { + DEBUG(dbgs() << "could not transform\n"); + c.assignOld(lrm); + } + } + return false; + } +}; + +char X86FixupZExt::id = 0; +} + +namespace llvm { +FunctionPass *createX86FixupZExt() { return new X86FixupZExt(); } +} Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "X86TargetMachine.h" #include "X86.h" +#include "X86TargetMachine.h" #include "X86TargetObjectFile.h" #include "X86TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" @@ -29,6 +29,10 @@ cl::desc("Enable the machine combiner pass"), cl::init(true), cl::Hidden); +static cl::opt EnableSetCCFixup("setcc-fixup", + cl::desc("Apply X86FixupSetCC"), + cl::init(false), cl::Hidden); + namespace llvm { void initializeWinEHStatePassPass(PassRegistry &); } @@ -238,7 +242,6 @@ }); } - //===----------------------------------------------------------------------===// // Pass Pipeline Configuration //===----------------------------------------------------------------------===// @@ -260,6 +263,7 @@ bool addPreISel() override; void addPreRegAlloc() override; void addPostRegAlloc() override; + bool addPreRewrite() override; void addPreEmitPass() override; void addPreSched2() override; }; @@ -305,8 +309,10 @@ void X86PassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { - addPass(createX86FixupSetCC()); - addPass(createX86OptimizeLEAs()); + if (EnableSetCCFixup) { + addPass(createX86FixupSetCC()); + } + addPass(createX86OptimizeLEAs()); } addPass(createX86CallFrameOptimization()); @@ -317,6 +323,13 @@ addPass(createX86FloatingPointStackifierPass()); } +bool X86PassConfig::addPreRewrite() { + if (!EnableSetCCFixup) { + addPass(createX86FixupZExt()); + } + return false; +} + void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } void X86PassConfig::addPreEmitPass() {