diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -626,6 +626,21 @@ unsigned &SrcOpIdx1, unsigned &SrcOpIdx2, bool IsIntrinsic = false) const; + + /// Returns true when instruction \p FlagI produces the same flags as \p OI. + /// The caller should pass in the results of calling analyzeCompare on \p OI: + /// \p SrcReg, \p SrcReg2, \p ImmMask, \p ImmValue. + /// If the flags match \p OI as if it had the input operands swapped then the + /// function succeeds and sets \p IsSwapped to true. + /// + /// Examples of OI, FlagI pairs returning true: + /// CMP %1, 42 and CMP %1, 42 + /// CMP %1, %2 and %3 = SUB %1, %2 + /// TEST %1, %1 and %2 = SUB %1, 0 + /// CMP %1, %2 and %3 = SUB %2, %1 ; IsSwapped=true + bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg, + Register SrcReg2, int64_t ImmMask, int64_t ImmValue, + const MachineInstr &OI, bool *IsSwapped) const; }; } // namespace llvm diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4011,42 +4011,72 @@ return false; } -/// Check whether the first instruction, whose only -/// purpose is to update flags, can be made redundant. -/// CMPrr can be made redundant by SUBrr if the operands are the same. -/// This function can be extended later on. -/// SrcReg, SrcRegs: register operands for FlagI. -/// ImmValue: immediate for FlagI if it takes an immediate. -inline static bool isRedundantFlagInstr(const MachineInstr &FlagI, +bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg, Register SrcReg2, int64_t ImmMask, int64_t ImmValue, - const MachineInstr &OI) { - if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) || - (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) || - (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) || - (FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) && - ((OI.getOperand(1).getReg() == SrcReg && - OI.getOperand(2).getReg() == SrcReg2) || - (OI.getOperand(1).getReg() == SrcReg2 && - OI.getOperand(2).getReg() == SrcReg))) - return true; - - if (ImmMask != 0 && - ((FlagI.getOpcode() == X86::CMP64ri32 && - OI.getOpcode() == X86::SUB64ri32) || - (FlagI.getOpcode() == X86::CMP64ri8 && - OI.getOpcode() == X86::SUB64ri8) || - (FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) || - (FlagI.getOpcode() == X86::CMP32ri8 && - OI.getOpcode() == X86::SUB32ri8) || - (FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) || - (FlagI.getOpcode() == X86::CMP16ri8 && - OI.getOpcode() == X86::SUB16ri8) || - (FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) && - OI.getOperand(1).getReg() == SrcReg && - OI.getOperand(2).getImm() == ImmValue) - return true; - return false; + const MachineInstr &OI, + bool *IsSwapped) const { + switch (OI.getOpcode()) { + case X86::CMP64rr: + case X86::CMP32rr: + case X86::CMP16rr: + case X86::CMP8rr: + case X86::SUB64rr: + case X86::SUB32rr: + case X86::SUB16rr: + case X86::SUB8rr: { + Register OISrcReg; + Register OISrcReg2; + int64_t OIMask; + int64_t OIValue; + if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) || + OIMask != ImmMask || OIValue != ImmValue) + return false; + if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) { + *IsSwapped = false; + return true; + } + if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) { + *IsSwapped = true; + return true; + } + return false; + } + case X86::CMP64ri32: + case X86::CMP64ri8: + case X86::CMP32ri: + case X86::CMP32ri8: + case X86::CMP16ri: + case X86::CMP16ri8: + case X86::CMP8ri: + case X86::SUB64ri32: + case X86::SUB64ri8: + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB16ri: + case X86::SUB16ri8: + case X86::SUB8ri: + case X86::TEST64rr: + case X86::TEST32rr: + case X86::TEST16rr: + case X86::TEST8rr: { + if (ImmMask != 0) { + Register OISrcReg; + Register OISrcReg2; + int64_t OIMask; + int64_t OIValue; + if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) && + SrcReg == OISrcReg && ImmMask == OIMask && OIValue == ImmValue) { + assert(SrcReg2 == X86::NoRegister && OISrcReg2 == X86::NoRegister && + "should not have 2nd register"); + return true; + } + } + return FlagI.isIdenticalTo(OI); + } + default: + return false; + } } /// Check whether the definition can be converted @@ -4275,12 +4305,19 @@ bool IsCmpZero = (CmpMask != 0 && CmpValue == 0); + // Transformation currently requires SSA values. + if (SrcReg2.isPhysical()) + return false; + MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg); + assert(SrcRegDef && "Must have a definition (SSA)"); + MachineInstr *MI = nullptr; MachineInstr *Sub = nullptr; MachineInstr *Movr0Inst = nullptr; bool NoSignFlag = false; bool ClearsOverflowFlag = false; bool ShouldUpdateCC = false; + bool IsSwapped = false; X86::CondCode NewCC = X86::COND_INVALID; // Search backward from CmpInstr for the next instruction defining EFLAGS. @@ -4288,8 +4325,6 @@ MachineBasicBlock &CmpMBB = *CmpInstr.getParent(); MachineBasicBlock::reverse_iterator From = std::next(MachineBasicBlock::reverse_iterator(CmpInstr)); - MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg); - assert(SrcRegDef && "Must have a definition (SSA)"); for (MachineBasicBlock *MBB = &CmpMBB;;) { for (MachineInstr &Inst : make_range(From, MBB->rend())) { // Try to use EFLAGS from the instruction defining %SrcReg. Example: @@ -4329,8 +4364,8 @@ // sub x, y or cmp x, y // ... // EFLAGS not changed // cmp x, y // <-- can be removed - if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, - CmpMask, CmpValue, Inst)) { + if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue, + Inst, &IsSwapped)) { Sub = &Inst; break; } @@ -4360,10 +4395,6 @@ From = MBB->rbegin(); } - bool IsSwapped = - (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg); - // Scan forward from the instruction after CmpInstr for uses of EFLAGS. // It is safe to remove CmpInstr if EFLAGS is redefined or killed. // If we are done with the basic block, we need to check whether EFLAGS is @@ -4386,7 +4417,7 @@ // EFLAGS is used by this instruction. X86::CondCode OldCC = X86::COND_INVALID; - if (IsCmpZero || IsSwapped) { + if (MI || IsSwapped) { // We decode the condition code from opcode. if (Instr.isBranch()) OldCC = X86::getCondFromBranch(Instr); @@ -4398,7 +4429,7 @@ if (OldCC == X86::COND_INVALID) return false; } X86::CondCode ReplacementCC = X86::COND_INVALID; - if (IsCmpZero) { + if (MI) { switch (OldCC) { default: break; case X86::COND_A: case X86::COND_AE: @@ -4456,14 +4487,15 @@ // If EFLAGS is not killed nor re-defined, we should check whether it is // live-out. If it is live-out, do not optimize. - if ((IsCmpZero || IsSwapped) && !IsSafe) { + if ((MI || IsSwapped) && !IsSafe) { for (MachineBasicBlock *Successor : CmpMBB.successors()) if (Successor->isLiveIn(X86::EFLAGS)) return false; } // The instruction to be updated is either Sub or MI. - Sub = IsCmpZero ? MI : Sub; + assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set"); + Sub = MI != nullptr ? MI : Sub; MachineBasicBlock *SubBB = Sub->getParent(); // Move Movr0Inst to the appropriate place before Sub. if (Movr0Inst) { diff --git a/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll b/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll --- a/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll +++ b/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll @@ -67,7 +67,6 @@ ; CHECK-NEXT: LBB0_6: ## %NodeBlock ; CHECK-NEXT: js LBB0_9 ; CHECK-NEXT: ## %bb.7: ## %LeafBlock1 -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: jne LBB0_3 ; CHECK-NEXT: ## %bb.8: ## %bb12.i.i935.exitStub ; CHECK-NEXT: movl %edi, (%esi) diff --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir --- a/llvm/test/CodeGen/X86/optimize-compare.mir +++ b/llvm/test/CodeGen/X86/optimize-compare.mir @@ -212,8 +212,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi ; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags - ; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags - ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags %0:gr32 = COPY $esi %1:gr32 = COPY $edi CMP32rr %0, %1, implicit-def $eflags @@ -231,7 +230,6 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi ; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags - ; CHECK-NEXT: CMP64ri8 [[COPY]], 15, implicit-def $eflags ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags %0:gr64 = COPY $rsi %1:gr64 = COPY $rdi @@ -249,7 +247,6 @@ ; CHECK: [[COPY:%[0-9]+]]:gr16 = COPY $ax ; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags - ; CHECK-NEXT: TEST16rr [[COPY]], [[COPY]], implicit-def $eflags ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags %0:gr16 = COPY $ax TEST16rr %0, %0, implicit-def $eflags @@ -267,8 +264,7 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edi ; CHECK-NEXT: CMP32rr [[COPY]], [[COPY1]], implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags - ; CHECK-NEXT: CMP32rr [[COPY1]], [[COPY]], implicit-def $eflags - ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags %0:gr32 = COPY $esi %1:gr32 = COPY $edi CMP32rr %0, %1, implicit-def $eflags @@ -285,7 +281,6 @@ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi ; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags - ; CHECK-NEXT: CMP32ri [[COPY]], -12345, implicit-def $eflags ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags %0:gr32 = COPY $esi CMP32ri %0, -12345, implicit-def $eflags @@ -323,7 +318,6 @@ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi ; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags - ; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags %0:gr32 = COPY $esi CMP32ri8 %0, 0, implicit-def $eflags @@ -340,7 +334,6 @@ ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $esi ; CHECK-NEXT: TEST32rr [[COPY]], [[COPY]], implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 2, implicit $eflags - ; CHECK-NEXT: CMP32ri8 [[COPY]], 0, implicit-def $eflags ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags %0:gr32 = COPY $esi TEST32rr %0, %0, implicit-def $eflags @@ -359,7 +352,6 @@ ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi ; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 7, implicit $eflags - ; CHECK-NEXT: CMP64ri32 [[COPY]], @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags ; CHECK-NEXT: $cl = SETCCr 3, implicit $eflags %0:gr64 = COPY $rsi CMP64ri32 %0, @opt_redundant_flags_cmp_addr + 4, implicit-def $eflags diff --git a/llvm/test/CodeGen/X86/postalloc-coalescing.ll b/llvm/test/CodeGen/X86/postalloc-coalescing.ll --- a/llvm/test/CodeGen/X86/postalloc-coalescing.ll +++ b/llvm/test/CodeGen/X86/postalloc-coalescing.ll @@ -15,7 +15,6 @@ ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: .LBB0_3: # %bb158 ; CHECK-NEXT: movb %al, 0 -; CHECK-NEXT: cmpl $-1, %eax ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retl entry: