Index: lib/Target/X86/X86FixupBWInsts.cpp =================================================================== --- lib/Target/X86/X86FixupBWInsts.cpp +++ lib/Target/X86/X86FixupBWInsts.cpp @@ -95,12 +95,6 @@ /// nullptr. MachineInstr *tryReplaceCopy(MachineInstr *MI) const; - // Change the MachineInstr \p MI into an eqivalent 32 bit instruction if - // possible. Return the replacement instruction if OK, return nullptr - // otherwise. Set WasCandidate to true or false depending on whether the - // MI was a candidate for this sort of transformation. - MachineInstr *tryReplaceInstr(MachineInstr *MI, MachineBasicBlock &MBB, - bool &WasCandidate) const; public: static char ID; @@ -273,54 +267,6 @@ return MIB; } -MachineInstr *FixupBWInstPass::tryReplaceInstr( - MachineInstr *MI, MachineBasicBlock &MBB, - bool &WasCandidate) const { - MachineInstr *NewMI = nullptr; - WasCandidate = false; - - // See if this is an instruction of the type we are currently looking for. - switch (MI->getOpcode()) { - - case X86::MOV8rm: - // Only replace 8 bit loads with the zero extending versions if - // in an inner most loop and not optimizing for size. This takes - // an extra byte to encode, and provides limited performance upside. - if (MachineLoop *ML = MLI->getLoopFor(&MBB)) { - if (ML->begin() == ML->end() && !OptForSize) { - NewMI = tryReplaceLoad(X86::MOVZX32rm8, MI); - WasCandidate = true; - } - } - break; - - case X86::MOV16rm: - // Always try to replace 16 bit load with 32 bit zero extending. - // Code size is the same, and there is sometimes a perf advantage - // from eliminating a false dependence on the upper portion of - // the register. - NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI); - WasCandidate = true; - break; - - case X86::MOV8rr: - case X86::MOV16rr: - // Always try to replace 8/16 bit copies with a 32 bit copy. - // Code size is either less (16) or equal (8), and there is sometimes a - // perf advantage from eliminating a false dependence on the upper portion - // of the register. - NewMI = tryReplaceCopy(MI); - WasCandidate = true; - break; - - default: - // nothing to do here. - break; - } - - return NewMI; -} - void FixupBWInstPass::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB) { @@ -342,65 +288,57 @@ // We run after PEI, so we need to AddPristinesAndCSRs. LiveRegs.addLiveOuts(MBB); - bool CandidateDidntGetTransformed = false; - bool WasCandidate = false; - for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) { + MachineInstr *NewMI = nullptr; MachineInstr *MI = &*I; - - MachineInstr *NewMI = tryReplaceInstr(MI, MBB, WasCandidate); - // Add this to replacements if it was a candidate, even if NewMI is - // nullptr. We will revisit that in a bit. - if (WasCandidate) { - MIReplacements.push_back(std::make_pair(MI, NewMI)); - if (!NewMI) - CandidateDidntGetTransformed = true; + // See if this is an instruction of the type we are currently looking for. + switch (MI->getOpcode()) { + + case X86::MOV8rm: + // Only replace 8 bit loads with the zero extending versions if + // in an inner most loop and not optimizing for size. This takes + // an extra byte to encode, and provides limited performance upside. + if (MachineLoop *ML = MLI->getLoopFor(&MBB)) { + if (ML->begin() == ML->end() && !OptForSize) + NewMI = tryReplaceLoad(X86::MOVZX32rm8, MI); + } + break; + + case X86::MOV16rm: + // Always try to replace 16 bit load with 32 bit zero extending. + // Code size is the same, and there is sometimes a perf advantage + // from eliminating a false dependence on the upper portion of + // the register. + NewMI = tryReplaceLoad(X86::MOVZX32rm16, MI); + break; + + case X86::MOV8rr: + case X86::MOV16rr: + // Always try to replace 8/16 bit copies with a 32 bit copy. + // Code size is either less (16) or equal (8), and there is sometimes a + // perf advantage from eliminating a false dependence on the upper portion + // of the register. + NewMI = tryReplaceCopy(MI); + break; + + default: + // nothing to do here. + break; } + if (NewMI) + MIReplacements.push_back(std::make_pair(MI, NewMI)); + // We're done with this instruction, update liveness for the next one. LiveRegs.stepBackward(*MI); } - if (CandidateDidntGetTransformed) { - // If there was a candidate that didn't get transformed then let's try - // doing the register liveness going forward. Sometimes one direction - // is overly conservative compared to the other. - // FIXME - Register liveness should be investigated further. This really - // shouldn't be necessary. See PR28142. - LiveRegs.clear(); - LiveRegs.addLiveIns(MBB); - - auto NextCandidateIter = MIReplacements.begin(); - auto EndCandidateIter = MIReplacements.end(); - - for (auto I = MBB.begin(); I != MBB.end(); ++I) { - MachineInstr *MI = &*I; - SmallVector, 4> Clobbers; - LiveRegs.stepForward(*MI, Clobbers); - - if (NextCandidateIter == EndCandidateIter) - break; - - // Only check and create a new instruction if this instruction is - // known to be a candidate that didn't get transformed. - if (NextCandidateIter->first == MI) { - if (NextCandidateIter->second == nullptr) { - MachineInstr *NewMI = tryReplaceInstr(MI, MBB, WasCandidate); - NextCandidateIter->second = NewMI; - } - ++NextCandidateIter; - } - } - } - while (!MIReplacements.empty()) { MachineInstr *MI = MIReplacements.back().first; MachineInstr *NewMI = MIReplacements.back().second; MIReplacements.pop_back(); - if (NewMI) { - MBB.insert(MI, NewMI); - MBB.erase(MI); - } + MBB.insert(MI, NewMI); + MBB.erase(MI); } } Index: test/CodeGen/X86/fixup-bw-inst-fwlive.mir =================================================================== --- test/CodeGen/X86/fixup-bw-inst-fwlive.mir +++ /dev/null @@ -1,37 +0,0 @@ -# RUN: llc -run-pass x86-fixup-bw-insts -mtriple=x86_64-- -o /dev/null %s 2>&1 | FileCheck %s - -# Verify that the forwards live-ness checking code in fixup-bw-inst works. - ---- | - target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" - - define i8 @foo(i8 %p1) { - entry: - %t1 = or i8 %p1, 0 - br label %false - false: - ret i8 %t1 - } - -... - ---- -name: foo -allVRegsAllocated: true -isSSA: false -tracksRegLiveness: true -liveins: - - { reg: '%edi' } -body: | - bb.0.entry: - liveins: %edi - successors: %bb.1.false - - %al = MOV8rr %dil, implicit %edi - ; CHECK: %eax = MOV32rr undef %edi, implicit %dil - - bb.1.false: - liveins: %al, %ax, %eax, %rax - RETQ %al - -...