diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -638,7 +638,8 @@ /// ImmValue: immediate for FlagI if it takes an immediate. bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg, Register SrcReg2, int64_t ImmMask, int64_t ImmValue, - const MachineInstr &OI, bool *IsSwapped) const; + const MachineInstr &OI, bool *IsSwapped, + int64_t *ImmDelta) const; }; } // namespace llvm diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4019,8 +4019,8 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg, Register SrcReg2, int64_t ImmMask, int64_t ImmValue, - const MachineInstr &OI, - bool *IsSwapped) const { + const MachineInstr &OI, bool *IsSwapped, + int64_t *ImmDelta) const { switch (OI.getOpcode()) { case X86::CMP64rr: case X86::CMP32rr: @@ -4071,10 +4071,21 @@ int64_t OIMask; int64_t OIValue; if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) && - SrcReg == OISrcReg && ImmMask == OIMask && OIValue == ImmValue) { - assert(SrcReg2 == X86::NoRegister && OISrcReg2 == X86::NoRegister && - "should not have 2nd register"); - return true; + SrcReg == OISrcReg && ImmMask == OIMask) { + if (OIValue == ImmValue) { + *ImmDelta = 0; + return true; + } else if (static_cast(ImmValue) == + static_cast(OIValue) - 1) { + *ImmDelta = -1; + return true; + } else if (static_cast(ImmValue) == + static_cast(OIValue) + 1) { + *ImmDelta = 1; + return true; + } else { + return false; + } } } return FlagI.isIdenticalTo(OI); @@ -4318,6 +4329,7 @@ bool ShouldUpdateCC = false; bool IsSwapped = false; X86::CondCode NewCC = X86::COND_INVALID; + int64_t ImmDelta = 0; // Search backward from CmpInstr for the next instruction defining EFLAGS. const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -4368,7 +4380,7 @@ // ... // EFLAGS not changed // cmp x, y // <-- can be removed if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue, - Inst, &IsSwapped)) { + Inst, &IsSwapped, &ImmDelta)) { Sub = &Inst; break; } @@ -4420,7 +4432,7 @@ // EFLAGS is used by this instruction. X86::CondCode OldCC = X86::COND_INVALID; - if (MI || IsSwapped) { + if (MI || IsSwapped || ImmDelta != 0) { // We decode the condition code from opcode. if (Instr.isBranch()) OldCC = X86::getCondFromBranch(Instr); @@ -4473,9 +4485,59 @@ // We swap the condition code and synthesize the new opcode. ReplacementCC = getSwappedCondition(OldCC); if (ReplacementCC == X86::COND_INVALID) return false; + ShouldUpdateCC = true; + } else if (ImmDelta != 0) { + unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg)); + // Shift amount for min/max constants to adjust for 8/16/32 instruction + // sizes. + switch (OldCC) { + case X86::COND_L: // x x <=s C + if (ImmDelta != 1 || APInt(BitWidth, INT64_MIN) == CmpValue) + return false; + ReplacementCC = X86::COND_LE; + break; + case X86::COND_B: // x x <=u C + if (ImmDelta != 1 || CmpValue == 0) + return false; + ReplacementCC = X86::COND_BE; + break; + case X86::COND_GE: // x >=s (C + 1) --> x >s C + if (ImmDelta != 1 || APInt(BitWidth, INT64_MIN) == CmpValue) + return false; + ReplacementCC = X86::COND_G; + break; + case X86::COND_AE: // x >=u (C + 1) --> x >u C + if (ImmDelta != 1 || CmpValue == 0) + return false; + ReplacementCC = X86::COND_A; + break; + case X86::COND_G: // x >s (C - 1) --> x >=s C + if (ImmDelta != -1 || APInt(BitWidth, INT64_MAX) == CmpValue) + return false; + ReplacementCC = X86::COND_GE; + break; + case X86::COND_A: // x >u (C - 1) --> x >=u C + if (ImmDelta != -1 || APInt(BitWidth, UINT64_MAX) == CmpValue) + return false; + ReplacementCC = X86::COND_AE; + break; + case X86::COND_LE: // x <=s (C - 1) --> x x = 2; CMP can be removed. + CMP64ri8 %0, 2, implicit-def $eflags + ; %0 >=s 2 --> %0 >s 1 + $bl = SETCCr 13, implicit $eflags + ; %0 >=u 2 --> %0 >u 1 + $bl = SETCCr 3, implicit $eflags + ; %0 %0 <=s 1 + $bl = SETCCr 12, implicit $eflags + ; %0 %0 <=u 1 + $bl = SETCCr 2, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_1 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_1 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CMP64ri8 [[COPY]], 42, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 5, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + %0:gr64 = COPY $rsi + ; CMP+SETCC %0 != 42 + CMP64ri8 %0, 42, implicit-def $eflags + $cl = SETCCr 5, implicit $eflags + ; CMP+SETCC %0 >= 2; CMP can be removed. + CMP64ri8 %0, 41, implicit-def $eflags + ; %0 >s 41 --> %0 >=s 42 + $bl = SETCCr 15, implicit $eflags + ; %0 >u 41 --> %0 >=u 42 + $bl = SETCCr 7, implicit $eflags + ; %0 <=s 41 --> %0 %0 =u 1) + CMP8ri %0, 1, implicit-def $eflags + $cl = SETCCr 3, implicit $eflags + + ; CMP should be removed (%0 <=s -1) + CMP8ri %0, -1, implicit-def $eflags + $cl = SETCCr 14, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_cmp_test +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_cmp_test + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CMP64ri32 [[COPY]], 1, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 13, implicit $eflags + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $edi + ; CHECK-NEXT: CMP64ri32 [[COPY1]], -1, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 14, implicit $eflags + %0:gr64 = COPY $rsi + CMP64ri32 %0, 1, implicit-def $eflags + ; TEST should be removed + TEST64rr %0, %0, implicit-def $eflags + $cl = SETCCr 15, implicit $eflags + + %1:gr64 = COPY $edi + CMP64ri32 %1, -1, implicit-def $eflags + ; TEST should be removed + TEST64rr %1, %1, implicit-def $eflags + $cl = SETCCr 12, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_noopt_0 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_0 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CMP64ri8 [[COPY]], 42, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags + ; CHECK-NEXT: CMP64ri8 [[COPY]], 41, implicit-def $eflags + ; CHECK-NEXT: $bl = SETCCr 4, implicit $eflags + %0:gr64 = COPY $rsi + ; CMP+SETCC %0 s INT16_MAX + $bl = SETCCr 15, implicit $eflags + + CMP16ri %0, 65535, implicit-def $eflags + ; CMP should not be removed. + CMP16ri %0, 32767, implicit-def $eflags + $bl = SETCCr 15, implicit $eflags + + CMP16ri %0, -32768, implicit-def $eflags + ; CMP should not be removed. + CMP16ri %0, 32767, implicit-def $eflags + $bl = SETCCr 14, implicit $eflags + + CMP16ri %0, 0, implicit-def $eflags + ; should not be removed + CMP16ri %0, 65535, implicit-def $eflags + $bl = SETCCr 4, implicit $eflags + + CMP16ri %0, 0, implicit-def $eflags + ; should not be removed + CMP16ri %0, 65535, implicit-def $eflags + $bl = SETCCr 6, implicit $eflags +... diff --git a/llvm/test/CodeGen/X86/peep-test-5.ll b/llvm/test/CodeGen/X86/peep-test-5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/peep-test-5.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s +; Example of a decref operation with "immortal" objects. +; void decref(long* refcount) { +; long count = *refcount; +; if (count == 1) { free_object() } +; else if (count > 1) { *refcount = count - 1; } +; else { /* immortal */ } +; } +; Resulting assembly should share flags from single CMP instruction for both +; conditions! +define void @decref(i32* %p) { +; CHECK-LABEL: decref: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb_free +; CHECK-NEXT: callq free_object@PLT +; CHECK-NEXT: .LBB0_4: # %end +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %bb2 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: jle .LBB0_4 +; CHECK-NEXT: # %bb.3: # %bb_dec +; CHECK-NEXT: decl %eax +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %count = load i32, i32* %p, align 4 + %cmp0 = icmp eq i32 %count, 1 + br i1 %cmp0, label %bb_free, label %bb2 + +bb2: + %cmp1 = icmp sgt i32 %count, 1 + br i1 %cmp1, label %bb_dec, label %end + +bb_dec: + %dec = add nsw i32 %count, -1 + store i32 %dec, i32* %p, align 4 + br label %end + +bb_free: + call void @free_object() + br label %end + +end: + ret void +} + +declare void @free_object() diff --git a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll @@ -117,9 +117,8 @@ ; CHECK-NEXT: cmpq $1, %rdx ; CHECK-NEXT: jg .LBB3_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: movl $1, %ecx -; CHECK-NEXT: cmovleq %rcx, %rax +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax ; CHECK-NEXT: .LBB3_2: # %return ; CHECK-NEXT: retq @@ -256,9 +255,8 @@ ; CHECK-NEXT: cmpq $1, %rdi ; CHECK-NEXT: jg .LBB8_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: movl $1, %ecx -; CHECK-NEXT: cmovleq %rcx, %rax +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax ; CHECK-NEXT: .LBB8_2: # %return ; CHECK-NEXT: retq @@ -412,9 +410,8 @@ ; CHECK-NEXT: cmpl $1, %eax ; CHECK-NEXT: jg .LBB13_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovlel %eax, %ecx +; CHECK-NEXT: cmovll %eax, %ecx ; CHECK-NEXT: imull %edi, %ecx ; CHECK-NEXT: .LBB13_2: # %return ; CHECK-NEXT: movslq %ecx, %rax @@ -563,9 +560,8 @@ ; CHECK-NEXT: cmpl $1, %edi ; CHECK-NEXT: jg .LBB18_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovlel %eax, %esi +; CHECK-NEXT: cmovll %eax, %esi ; CHECK-NEXT: imull %edi, %esi ; CHECK-NEXT: .LBB18_2: # %return ; CHECK-NEXT: movslq %esi, %rax