Index: lib/CodeGen/MachineCSE.cpp =================================================================== --- lib/CodeGen/MachineCSE.cpp +++ lib/CodeGen/MachineCSE.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" #include "llvm/Target/TargetInstrInfo.h" @@ -37,6 +38,11 @@ "Number of cross-MBB physreg referencing CS eliminated"); STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); +//FIXME remove this after fix CodeGen/X86/inline-asm-fpstack.ll +static cl::opt +CSEIgnoreCopy("cse-ignore-copy", cl::init(false), cl::Hidden, + cl::desc("ignore copy")); + namespace { class MachineCSE : public MachineFunctionPass { const TargetInstrInfo *TII; @@ -114,53 +120,41 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB) { - bool Changed = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (!MRI->hasOneNonDBGUse(Reg)) - // Only coalesce single use copies. This ensure the copy will be - // deleted. - continue; - MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (!DefMI->isCopy()) - continue; - unsigned SrcReg = DefMI->getOperand(1).getReg(); - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) - continue; - if (DefMI->getOperand(0).getSubReg()) - continue; - // FIXME: We should trivially coalesce subregister copies to expose CSE - // opportunities on instructions with truncated operands (see - // cse-add-with-overflow.ll). This can be done here as follows: - // if (SrcSubReg) - // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, - // SrcSubReg); - // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); - // - // The 2-addr pass has been updated to handle coalesced subregs. However, - // some machine-specific code still can't handle it. - // To handle it properly we also need a way find a constrained subregister - // class given a super-reg class and subreg index. - if (DefMI->getOperand(1).getSubReg()) - continue; - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - if (!MRI->constrainRegClass(SrcReg, RC)) - continue; - DEBUG(dbgs() << "Coalescing: " << *DefMI); - DEBUG(dbgs() << "*** to: " << *MI); - MO.setReg(SrcReg); - MRI->clearKillFlags(SrcReg); - DefMI->eraseFromParent(); - ++NumCoalesces; - Changed = true; + if (!MI->isCopy()) + return false; + unsigned SrcReg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + unsigned DstReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return false; + if (!MRI->hasOneNonDBGUse(DstReg)) + // Only coalesce single use copies. This ensure the copy will be + // deleted. + return false; + MachineInstr *UseMI = nullptr; + MachineOperand *UseMO = nullptr; + bool OnlyOneUse = true; + for (MachineOperand &MO : MRI->use_nodbg_operands(DstReg)) { + assert(OnlyOneUse && "only one use should be..."); + OnlyOneUse = false; + UseMO = &MO; + UseMI = UseMO->getParent(); + if (!isCSECandidate(UseMI)) + return false; } - - return Changed; + if (MI->getOperand(0).getSubReg() ||MI->getOperand(1).getSubReg()) + return false; + const TargetRegisterClass *RC = MRI->getRegClass(DstReg); + if (!MRI->constrainRegClass(SrcReg, RC)) + return false; + DEBUG(dbgs() << "Coalescing: " << *MI); + DEBUG(dbgs() << "*** to: " << *UseMI); + UseMO->setReg(SrcReg); + MRI->clearKillFlags(SrcReg); + MI->eraseFromParent(); + ++NumCoalesces; + return true; } bool @@ -330,8 +324,8 @@ MI->isInlineAsm() || MI->isDebugValue()) return false; - // Ignore copies. - if (MI->isCopyLike()) + // Ignore copies. + if (CSEIgnoreCopy && MI->isCopyLike()) return false; // Ignore stuff that we obviously can't move. @@ -448,22 +442,15 @@ MachineInstr *MI = &*I; ++I; + if (PerformTrivialCoalescing(MI, MBB)) { + Changed = true; + continue; + } + if (!isCSECandidate(MI)) continue; bool FoundCSE = VNT.count(MI); - if (!FoundCSE) { - // Look for trivial copy coalescing opportunities. - if (PerformTrivialCoalescing(MI, MBB)) { - Changed = true; - - // After coalescing MI itself may become a copy. - if (MI->isCopyLike()) - continue; - FoundCSE = VNT.count(MI); - } - } - // Commute commutable instructions. bool Commuted = false; if (!FoundCSE && MI->isCommutable()) { Index: test/CodeGen/ARM/atomic-64bit.ll =================================================================== --- test/CodeGen/ARM/atomic-64bit.ll +++ test/CodeGen/ARM/atomic-64bit.ll @@ -189,8 +189,8 @@ ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] ; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG1]], r2 ; CHECK-THUMB-LE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG2]], r3 -; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]] -; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_HI:[a-z0-9]+]], [[REG1]], r2 +; CHECK-THUMB-BE-DAG: eor.w [[MISMATCH_LO:[a-z0-9]+]], [[REG2]], r3 ; CHECK-THUMB: orrs [[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-THUMB: bne ; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} Index: test/CodeGen/ARM/debug-info-branch-folding.ll =================================================================== --- test/CodeGen/ARM/debug-info-branch-folding.ll +++ test/CodeGen/ARM/debug-info-branch-folding.ll @@ -2,11 +2,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" target triple = "thumbv7-apple-macosx10.6.7" -;CHECK: vadd.f32 q4, q8, q8 +;CHECK: vadd.f32 q[[REGNO:[0-9]+]], q8, q8 ;CHECK-NEXT: LBB0_1 -;CHECK:@DEBUG_VALUE: x <- Q4{{$}} -;CHECK-NEXT:@DEBUG_VALUE: y <- Q4{{$}} +;CHECK:@DEBUG_VALUE: x <- Q[[REGNO]]{{$}} +;CHECK-NEXT:@DEBUG_VALUE: y <- Q[[REGNO]]{{$}} @.str = external constant [13 x i8] Index: test/CodeGen/X86/cse-add-with-overflow.ll =================================================================== --- test/CodeGen/X86/cse-add-with-overflow.ll +++ test/CodeGen/X86/cse-add-with-overflow.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=x86_64-darwin -mcpu=generic | FileCheck %s -; XFAIL: * ; rdar:15661073 simple example of redundant adds ; ; MachineCSE should coalesce trivial subregister copies. @@ -6,7 +5,6 @@ ; ; The extra movl+addl should be removed during MachineCSE. ; CHECK-LABEL: redundantadd -; CHECK: cmpq ; CHECK: movq ; CHECK-NOT: movl ; CHECK: addl @@ -17,13 +15,6 @@ entry: %tmp8 = load i64* %a0, align 8 %tmp12 = load i64* %a1, align 8 - %tmp13 = icmp ult i64 %tmp12, -281474976710656 - br i1 %tmp13, label %exit1, label %body - -exit1: - unreachable - -body: %tmp14 = trunc i64 %tmp8 to i32 %tmp15 = trunc i64 %tmp12 to i32 %tmp16 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %tmp14, i32 %tmp15) @@ -36,8 +27,7 @@ return: %tmp18 = add i64 %tmp12, %tmp8 %tmp19 = and i64 %tmp18, 4294967295 - %tmp20 = or i64 %tmp19, -281474976710656 - ret i64 %tmp20 + ret i64 %tmp19 } declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) Index: test/CodeGen/X86/inline-asm-fpstack.ll =================================================================== --- test/CodeGen/X86/inline-asm-fpstack.ll +++ test/CodeGen/X86/inline-asm-fpstack.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -no-integrated-as | FileCheck %s +; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -no-integrated-as -cse-ignore-copy | FileCheck %s ; There should be no stack manipulations between the inline asm and ret. ; CHECK: test1