diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3977,8 +3977,10 @@ /// Check whether the definition can be converted /// to remove a comparison against zero. -inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) { +inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag, + bool &ClearsOverflowFlag) { NoSignFlag = false; + ClearsOverflowFlag = false; switch (MI.getOpcode()) { default: return false; @@ -4013,6 +4015,21 @@ case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm: case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm: case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r: + case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri: + case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8: + case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr: + case X86::ADC16rr: case X86::ADC8rr: case X86::ADC64rm: + case X86::ADC32rm: case X86::ADC16rm: case X86::ADC8rm: + case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri: + case X86::SBB32ri8: case X86::SBB16ri: case X86::SBB16ri8: + case X86::SBB8ri: case X86::SBB64rr: case X86::SBB32rr: + case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm: + case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm: + case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r: + case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1: + case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1: + case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1: + return true; case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri: case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8: case X86::AND8ri: case X86::AND64rr: case X86::AND32rr: @@ -4028,20 +4045,6 @@ case X86::OR8ri: case X86::OR64rr: case X86::OR32rr: case X86::OR16rr: case X86::OR8rr: case X86::OR64rm: case X86::OR32rm: case X86::OR16rm: case X86::OR8rm: - case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri: - case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8: - case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr: - case X86::ADC16rr: case X86::ADC8rr: case X86::ADC64rm: - case X86::ADC32rm: case X86::ADC16rm: case X86::ADC8rm: - case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri: - case X86::SBB32ri8: case X86::SBB16ri: case X86::SBB16ri8: - case X86::SBB8ri: case X86::SBB64rr: case X86::SBB32rr: - case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm: - case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm: - case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r: - case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1: - case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1: - case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1: case X86::ANDN32rr: case X86::ANDN32rm: case X86::ANDN64rr: case X86::ANDN64rm: case X86::BLSI32rr: case X86::BLSI32rm: @@ -4079,6 +4082,7 @@ case X86::T1MSKC64rr: case X86::T1MSKC64rm: case X86::TZMSK32rr: case X86::TZMSK32rm: case X86::TZMSK64rr: case X86::TZMSK64rm: + ClearsOverflowFlag = true; return true; case X86::BEXTR32rr: case X86::BEXTR64rr: case X86::BEXTR32rm: case X86::BEXTR64rm: @@ -4204,8 +4208,9 @@ // right way. bool ShouldUpdateCC = false; bool NoSignFlag = false; + bool ClearsOverflowFlag = false; X86::CondCode NewCC = X86::COND_INVALID; - if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) { + if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) { // Scan forward from the use until we hit the use we're looking for or the // compare instruction. for (MachineBasicBlock::iterator J = MI;; ++J) { @@ -4317,11 +4322,15 @@ default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: + // CF is used, we can't perform this optimization. + return false; case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: case X86::COND_O: case X86::COND_NO: - // CF and OF are used, we can't perform this optimization. - return false; + // If OF is used, the instruction needs to clear it like CmpZero does. + if (!ClearsOverflowFlag) + return false; + break; case X86::COND_S: case X86::COND_NS: // If SF is used, but the instruction doesn't update the SF, then we // can't do the optimization.