Index: llvm/trunk/lib/Target/X86/X86FixupBWInsts.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FixupBWInsts.cpp +++ llvm/trunk/lib/Target/X86/X86FixupBWInsts.cpp @@ -49,7 +49,7 @@ #include "X86InstrInfo.h" #include "X86Subtarget.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -81,7 +81,7 @@ /// \brief Loop over all of the instructions in the basic block /// replacing applicable byte or word instructions with better /// alternatives. - void processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB) const; + void processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); /// \brief This sets the \p SuperDestReg to the 32 bit super reg /// of the original destination register of the MachineInstr @@ -128,6 +128,9 @@ /// Machine loop info used for guiding some heruistics. MachineLoopInfo *MLI; + + /// Register Liveness information after the current instruction. + LivePhysRegs LiveRegs; }; char FixupBWInstPass::ID = 0; } @@ -142,6 +145,7 @@ TII = MF.getSubtarget().getInstrInfo(); OptForSize = MF.getFunction()->optForSize(); MLI = &getAnalysis(); + LiveRegs.init(&TII->getRegisterInfo()); DEBUG(dbgs() << "Start X86FixupBWInsts\n";); @@ -181,11 +185,7 @@ if (getX86SubSuperRegister(SuperDestReg, OrigDestSize) != OrigDestReg) return false; - MachineBasicBlock::LivenessQueryResult LQR = - OrigMI->getParent()->computeRegisterLiveness(&TII->getRegisterInfo(), - SuperDestReg, OrigMI); - - if (LQR != MachineBasicBlock::LQR_Dead) + if (LiveRegs.contains(SuperDestReg)) return false; if (OrigDestSize == 8) { @@ -194,9 +194,7 @@ // whether the super-register is dead. unsigned UpperByteReg = getX86SubSuperRegister(SuperDestReg, 8, true); - LQR = OrigMI->getParent()->computeRegisterLiveness(&TII->getRegisterInfo(), - UpperByteReg, OrigMI); - if (LQR != MachineBasicBlock::LQR_Dead) + if (LiveRegs.contains(UpperByteReg)) return false; } @@ -229,7 +227,7 @@ } void FixupBWInstPass::processBasicBlock(MachineFunction &MF, - MachineBasicBlock &MBB) const { + MachineBasicBlock &MBB) { // This algorithm doesn't delete the instructions it is replacing // right away. By leaving the existing instructions in place, the @@ -243,9 +241,14 @@ // from making it seem as if the larger register might be live. SmallVector, 8> MIReplacements; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) { + // Start computing liveness for this block. We iterate from the end to be able + // to update this for each instruction. + LiveRegs.clear(); + LiveRegs.addLiveOuts(&MBB); + + for (auto I = MBB.rbegin(); I != MBB.rend(); ++I) { MachineInstr *NewMI = nullptr; - MachineInstr *MI = I; + MachineInstr *MI = &*I; // See if this is an instruction of the type we are currently looking for. switch (MI->getOpcode()) { @@ -275,6 +278,9 @@ if (NewMI) MIReplacements.push_back(std::make_pair(MI, NewMI)); + + // We're done with this instruction, update liveness for the next one. + LiveRegs.stepBackward(*MI); } while (!MIReplacements.empty()) { Index: llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll +++ llvm/trunk/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fixup-byte-word-insts=0 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWOFF %s -; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck -check-prefix=CHECK -check-prefix=BWON %s %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } @@ -185,7 +185,8 @@ ; BWON: movzbl ; BWOFF: movb ; CHECK: movb -; CHECK: movb +; BWON: movzbl +; BWOFF: movb ; CHECK: movb ; CHECK: ret define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp { @@ -340,8 +341,9 @@ ; Make sure that we merge the consecutive load/store sequence below and use a ; word (16 bit) instead of a byte copy. ; CHECK-LABEL: MergeLoadStoreBaseIndexOffset: -; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] -; CHECK: movw [[REG]], (%{{.*}}) +; BWON: movzwl (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]] +; BWOFF: movw (%{{.*}},%{{.*}}), %[[REG:[a-z]+]] +; CHECK: movw %[[REG]], (%{{.*}}) define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) { br label %1 @@ -372,8 +374,9 @@ ; word (16 bit) instead of a byte copy even if there are intermediate sign ; extensions. ; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetSext: -; CHECK: movw (%{{.*}},%{{.*}}), [[REG:%[a-z]+]] -; CHECK: movw [[REG]], (%{{.*}}) +; BWON: movzwl (%{{.*}},%{{.*}}), %e[[REG:[a-z]+]] +; BWOFF: movw (%{{.*}},%{{.*}}), %[[REG:[a-z]+]] +; CHECK: movw %[[REG]], (%{{.*}}) define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) { br label %1 Index: llvm/trunk/test/CodeGen/X86/fixup-bw-inst.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fixup-bw-inst.ll +++ llvm/trunk/test/CodeGen/X86/fixup-bw-inst.ll @@ -11,8 +11,6 @@ ; This has byte loads interspersed with byte stores, in a single ; basic-block loop. The upper portion should be dead, so the movb loads ; should have been changed into movzbl instead. -; TODO: The second movb load doesn't get fixed due to register liveness -; not being accurate enough. ; CHECK-LABEL: foo1 ; load: ; BWON: movzbl @@ -20,7 +18,8 @@ ; store: ; CHECK: movb ; load: -; CHECK: movb +; BWON: movzbl +; BWOFF: movb ; store: ; CHECK: movb ; CHECK: ret @@ -59,8 +58,6 @@ ; This has word loads interspersed with word stores. ; The upper portion should be dead, so the movw loads should have ; been changed into movzwl instead. -; TODO: The second movw load doesn't get fixed due to register liveness -; not being accurate enough. ; CHECK-LABEL: foo2 ; load: ; BWON: movzwl @@ -68,7 +65,8 @@ ; store: ; CHECK: movw ; load: -; CHECK: movw +; BWON: movzwl +; BWOFF: movw ; store: ; CHECK: movw ; CHECK: ret Index: llvm/trunk/test/CodeGen/X86/return-ext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/return-ext.ll +++ llvm/trunk/test/CodeGen/X86/return-ext.ll @@ -6,7 +6,10 @@ ; RUN: FileCheck -check-prefix=CHECK -check-prefix=BWOFF %s ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -fixup-byte-word-insts=1 | \ ; RUN: FileCheck -check-prefix=CHECK -check-prefix=BWON %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -fixup-byte-word-insts=0 | \ +; RUN: FileCheck -check-prefix=DARWIN -check-prefix=DARWIN-BWOFF %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -fixup-byte-word-insts=1 | \ +; RUN: FileCheck -check-prefix=DARWIN -check-prefix=DARWIN-BWON %s @x = common global i32 0, align 4 @@ -84,7 +87,8 @@ ; Except on Darwin, for legay reasons. ; DARWIN-LABEL: unsigned_i16: -; DARWIN: movw +; DARWIN-BWOFF: movw +; DARWIN-BWON: movzwl ; DARWIN-NEXT: addw ; DARWIN-NEXT: movzwl ; DARWIN-NEXT: ret