diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -643,7 +643,8 @@ /// CMP %1, %2 and %3 = SUB %2, %1 ; IsSwapped=true bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg, Register SrcReg2, int64_t ImmMask, int64_t ImmValue, - const MachineInstr &OI, bool *IsSwapped) const; + const MachineInstr &OI, bool *IsSwapped, + int64_t *ImmDelta) const; }; } // namespace llvm diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -4088,8 +4088,8 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg, Register SrcReg2, int64_t ImmMask, int64_t ImmValue, - const MachineInstr &OI, - bool *IsSwapped) const { + const MachineInstr &OI, bool *IsSwapped, + int64_t *ImmDelta) const { switch (OI.getOpcode()) { case X86::CMP64rr: case X86::CMP32rr: @@ -4140,10 +4140,21 @@ int64_t OIMask; int64_t OIValue; if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) && - SrcReg == OISrcReg && ImmMask == OIMask && OIValue == ImmValue) { - assert(SrcReg2 == X86::NoRegister && OISrcReg2 == X86::NoRegister && - "should not have 2nd register"); - return true; + SrcReg == OISrcReg && ImmMask == OIMask) { + if (OIValue == ImmValue) { + *ImmDelta = 0; + return true; + } else if (static_cast(ImmValue) == + static_cast(OIValue) - 1) { + *ImmDelta = -1; + return true; + } else if (static_cast(ImmValue) == + static_cast(OIValue) + 1) { + *ImmDelta = 1; + return true; + } else { + return false; + } } } return FlagI.isIdenticalTo(OI); @@ -4393,6 +4404,7 @@ bool ShouldUpdateCC = false; bool IsSwapped = false; X86::CondCode NewCC = X86::COND_INVALID; + int64_t ImmDelta = 0; // Search backward from CmpInstr for the next instruction defining EFLAGS. const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -4439,7 +4451,7 @@ // ... // EFLAGS not changed // cmp x, y // <-- can be removed if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue, - Inst, &IsSwapped)) { + Inst, &IsSwapped, &ImmDelta)) { Sub = &Inst; break; } @@ -4473,7 +4485,7 @@ // It is safe to remove CmpInstr if EFLAGS is redefined or killed. // If we are done with the basic block, we need to check whether EFLAGS is // live-out. - bool IsSafe = false; + bool FlagsMayLiveOut = true; SmallVector, 4> OpsToUpdate; MachineBasicBlock::iterator AfterCmpInstr = std::next(MachineBasicBlock::iterator(CmpInstr)); @@ -4483,7 +4495,7 @@ // We should check the usage if this instruction uses and updates EFLAGS. if (!UseEFLAGS && ModifyEFLAGS) { // It is safe to remove CmpInstr if EFLAGS is updated again. - IsSafe = true; + FlagsMayLiveOut = false; break; } if (!UseEFLAGS && !ModifyEFLAGS) @@ -4491,7 +4503,7 @@ // EFLAGS is used by this instruction. X86::CondCode OldCC = X86::COND_INVALID; - if (MI || IsSwapped) { + if (MI || IsSwapped || ImmDelta != 0) { // We decode the condition code from opcode. if (Instr.isBranch()) OldCC = X86::getCondFromBranch(Instr); @@ -4545,9 +4557,59 @@ ReplacementCC = getSwappedCondition(OldCC); if (ReplacementCC == X86::COND_INVALID) return false; + ShouldUpdateCC = true; + } else if (ImmDelta != 0) { + unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg)); + // Shift amount for min/max constants to adjust for 8/16/32 instruction + // sizes. + switch (OldCC) { + case X86::COND_L: // x x <=s C + if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue) + return false; + ReplacementCC = X86::COND_LE; + break; + case X86::COND_B: // x x <=u C + if (ImmDelta != 1 || CmpValue == 0) + return false; + ReplacementCC = X86::COND_BE; + break; + case X86::COND_GE: // x >=s (C + 1) --> x >s C + if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue) + return false; + ReplacementCC = X86::COND_G; + break; + case X86::COND_AE: // x >=u (C + 1) --> x >u C + if (ImmDelta != 1 || CmpValue == 0) + return false; + ReplacementCC = X86::COND_A; + break; + case X86::COND_G: // x >s (C - 1) --> x >=s C + if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue) + return false; + ReplacementCC = X86::COND_GE; + break; + case X86::COND_A: // x >u (C - 1) --> x >=u C + if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue) + return false; + ReplacementCC = X86::COND_AE; + break; + case X86::COND_LE: // x <=s (C - 1) --> x x isLiveIn(X86::EFLAGS)) return false; diff --git a/llvm/test/CodeGen/X86/optimize-compare.mir b/llvm/test/CodeGen/X86/optimize-compare.mir --- a/llvm/test/CodeGen/X86/optimize-compare.mir +++ b/llvm/test/CodeGen/X86/optimize-compare.mir @@ -379,3 +379,361 @@ CMP64ri32 %0, 24, implicit-def $eflags $cl = SETCCr 3, implicit $eflags ... +--- +name: opt_redundant_flags_adjusted_imm_0 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_0 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CMP64ri8 [[COPY]], 1, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 15, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 7, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 14, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 6, implicit $eflags + %0:gr64 = COPY $rsi + ; CMP+SETCC %0 == 1 + CMP64ri8 %0, 1, implicit-def $eflags + $cl = SETCCr 4, implicit $eflags + ; CMP+SETCC %0 >= 2; CMP can be removed. + CMP64ri8 %0, 2, implicit-def $eflags + ; %0 >=s 2 --> %0 >s 1 + $bl = SETCCr 13, implicit $eflags + ; %0 >=u 2 --> %0 >u 1 + $bl = SETCCr 3, implicit $eflags + ; %0 %0 <=s 1 + $bl = SETCCr 12, implicit $eflags + ; %0 %0 <=u 1 + $bl = SETCCr 2, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_1 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_1 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CMP64ri8 [[COPY]], 42, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 5, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 13, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 3, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 12, implicit $eflags + ; CHECK-NEXT: $bl = SETCCr 2, implicit $eflags + %0:gr64 = COPY $rsi + ; CMP+SETCC %0 != 42 + CMP64ri8 %0, 42, implicit-def $eflags + $cl = SETCCr 5, implicit $eflags + ; CMP+SETCC %0 >= 2; CMP can be removed. + CMP64ri8 %0, 41, implicit-def $eflags + ; %0 >s 41 --> %0 >=s 42 + $bl = SETCCr 15, implicit $eflags + ; %0 >u 41 --> %0 >=u 42 + $bl = SETCCr 7, implicit $eflags + ; %0 <=s 41 --> %0 %0 =u 1) + CMP8ri %0, 1, implicit-def $eflags + $cl = SETCCr 3, implicit $eflags + + ; CMP should be removed (%0 <=s -1) + CMP8ri %0, -1, implicit-def $eflags + $cl = SETCCr 14, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_cmp_test +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_cmp_test + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CMP64ri32 [[COPY]], 1, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 13, implicit $eflags + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $edi + ; CHECK-NEXT: CMP64ri32 [[COPY1]], -1, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 14, implicit $eflags + %0:gr64 = COPY $rsi + CMP64ri32 %0, 1, implicit-def $eflags + ; TEST should be removed + TEST64rr %0, %0, implicit-def $eflags + $cl = SETCCr 15, implicit $eflags + + %1:gr64 = COPY $edi + CMP64ri32 %1, -1, implicit-def $eflags + ; TEST should be removed + TEST64rr %1, %1, implicit-def $eflags + $cl = SETCCr 12, implicit $eflags +... +--- +name: opt_redundant_flags_adjusted_imm_noopt_0 +body: | + bb.0: + ; CHECK-LABEL: name: opt_redundant_flags_adjusted_imm_noopt_0 + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: CMP64ri8 [[COPY]], 42, implicit-def $eflags + ; CHECK-NEXT: $cl = SETCCr 4, implicit $eflags + ; CHECK-NEXT: CMP64ri8 [[COPY]], 41, implicit-def $eflags + ; CHECK-NEXT: $bl = SETCCr 4, implicit $eflags + %0:gr64 = COPY $rsi + ; CMP+SETCC %0 s INT16_MAX + $bl = SETCCr 15, implicit $eflags + + CMP16ri %0, 65535, implicit-def $eflags + ; CMP should not be removed. + CMP16ri %0, 32767, implicit-def $eflags + $bl = SETCCr 15, implicit $eflags + + CMP16ri %0, -32768, implicit-def $eflags + ; CMP should not be removed. + CMP16ri %0, 32767, implicit-def $eflags + $bl = SETCCr 14, implicit $eflags + + CMP16ri %0, 0, implicit-def $eflags + ; should not be removed + CMP16ri %0, 65535, implicit-def $eflags + $bl = SETCCr 4, implicit $eflags + + CMP16ri %0, 0, implicit-def $eflags + ; should not be removed + CMP16ri %0, 65535, implicit-def $eflags + $bl = SETCCr 6, implicit $eflags +... +--- +name: opt_adjusted_imm_multiple_blocks +body: | + ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax + ; CHECK-NEXT: CMP32ri [[COPY]], 20, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: JCC_1 %bb.2, 15, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: RET 0 + bb.0: + %0:gr32 = COPY $eax + CMP32ri %0, 20, implicit-def $eflags + JCC_1 %bb.1, 4, implicit $eflags + JMP_1 %bb.3 + + bb.1: + ; CMP can be removed when adjusting the JCC. + CMP32ri %0, 21, implicit-def $eflags + JCC_1 %bb.2, 13, implicit $eflags + JMP_1 %bb.3 + + bb.2: + JMP_1 %bb.3 + + bb.3: + RET 0 +... +--- +name: opt_adjusted_imm_multiple_blocks_noopt +body: | + ; CHECK-LABEL: name: opt_adjusted_imm_multiple_blocks_noopt + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $eax + ; CHECK-NEXT: CMP32ri [[COPY]], 20, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.1, 4, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CMP32ri [[COPY]], 21, implicit-def $eflags + ; CHECK-NEXT: JCC_1 %bb.2, 13, implicit $eflags + ; CHECK-NEXT: JMP_1 %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: RET 0 + bb.0: + %0:gr32 = COPY $eax + CMP32ri %0, 20, implicit-def $eflags + JCC_1 %bb.1, 4, implicit $eflags + JMP_1 %bb.3 + + bb.1: + ; The following CMP should not be optimized because $eflags is live-out + CMP32ri %0, 21, implicit-def $eflags + JCC_1 %bb.2, 13, implicit $eflags + JMP_1 %bb.3 + + bb.2: + liveins: $eflags + $al = SETCCr 4, implicit $eflags + + bb.3: + RET 0 +... +--- +name: opt_shift_cmp_zero +body: | + bb.0: + ; CHECK-LABEL: name: opt_shift_cmp_zero + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: [[SHL64ri:%[0-9]+]]:gr64 = SHL64ri [[COPY]], 7, implicit-def $eflags + ; CHECK-NEXT: $al = SETCCr 4, implicit $eflags + %0:gr64 = COPY $rsi + %1:gr64 = SHL64ri %0, 7, implicit-def dead $eflags + ; TEST should be removed. + TEST64rr %1, %1, implicit-def $eflags + $al = SETCCr 4, implicit $eflags +... +--- +name: noopt_shift_cmp_zero +body: | + bb.0: + ; CHECK-LABEL: name: noopt_shift_cmp_zero + ; CHECK: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: [[SHL64ri:%[0-9]+]]:gr64 = SHL64ri [[COPY]], 9, implicit-def dead $eflags + ; CHECK-NEXT: TEST64rr [[SHL64ri]], [[SHL64ri]], implicit-def $eflags + ; CHECK-NEXT: $al = SETCCr 14, implicit $eflags + %0:gr64 = COPY $rsi + %1:gr64 = SHL64ri %0, 9, implicit-def dead $eflags + ; TEST cannot be removed if a user relies on the OF flag. + TEST64rr %1, %1, implicit-def $eflags + $al = SETCCr 14, implicit $eflags +... +--- +name: noopt_shift_cmp_zero_multiblock +body: | + ; CHECK-LABEL: name: noopt_shift_cmp_zero_multiblock + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rsi + ; CHECK-NEXT: [[SHL64ri:%[0-9]+]]:gr64 = SHL64ri [[COPY]], 9, implicit-def dead $eflags + ; CHECK-NEXT: TEST64rr [[SHL64ri]], [[SHL64ri]], implicit-def $eflags + ; CHECK-NEXT: JMP_1 %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: $al = SETCCr 14, implicit $eflags + bb.0: + %0:gr64 = COPY $rsi + %1:gr64 = SHL64ri %0, 9, implicit-def dead $eflags + ; TEST cannot be removed if a user relies on the OF flag. + TEST64rr %1, %1, implicit-def $eflags + JMP_1 %bb.1 + + bb.1: + liveins: $eflags + $al = SETCCr 14, implicit $eflags +... diff --git a/llvm/test/CodeGen/X86/peep-test-5.ll b/llvm/test/CodeGen/X86/peep-test-5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/peep-test-5.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - %s -mtriple=x86_64-- | FileCheck %s +; Example of a decref operation with "immortal" objects. +; void decref(long* refcount) { +; long count = *refcount; +; if (count == 1) { free_object() } +; else if (count > 1) { *refcount = count - 1; } +; else { /* immortal */ } +; } +; Resulting assembly should share flags from single CMP instruction for both +; conditions! +define void @decref(i32* %p) { +; CHECK-LABEL: decref: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: cmpl $1, %eax +; CHECK-NEXT: jne .LBB0_2 +; CHECK-NEXT: # %bb.1: # %bb_free +; CHECK-NEXT: callq free_object@PLT +; CHECK-NEXT: .LBB0_4: # %end +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %bb2 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: jle .LBB0_4 +; CHECK-NEXT: # %bb.3: # %bb_dec +; CHECK-NEXT: decl %eax +; CHECK-NEXT: movl %eax, (%rdi) +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq + %count = load i32, i32* %p, align 4 + %cmp0 = icmp eq i32 %count, 1 + br i1 %cmp0, label %bb_free, label %bb2 + +bb2: + %cmp1 = icmp sgt i32 %count, 1 + br i1 %cmp1, label %bb_dec, label %end + +bb_dec: + %dec = add nsw i32 %count, -1 + store i32 %dec, i32* %p, align 4 + br label %end + +bb_free: + call void @free_object() + br label %end + +end: + ret void +} + +declare void @free_object() diff --git a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll @@ -117,9 +117,8 @@ ; CHECK-NEXT: cmpq $1, %rdx ; CHECK-NEXT: jg .LBB3_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: movl $1, %ecx -; CHECK-NEXT: cmovleq %rcx, %rax +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax ; CHECK-NEXT: .LBB3_2: # %return ; CHECK-NEXT: retq @@ -256,9 +255,8 @@ ; CHECK-NEXT: cmpq $1, %rdi ; CHECK-NEXT: jg .LBB8_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: movl $1, %ecx -; CHECK-NEXT: cmovleq %rcx, %rax +; CHECK-NEXT: cmovlq %rcx, %rax ; CHECK-NEXT: imulq %rdi, %rax ; CHECK-NEXT: .LBB8_2: # %return ; CHECK-NEXT: retq @@ -412,9 +410,8 @@ ; CHECK-NEXT: cmpl $1, %eax ; CHECK-NEXT: jg .LBB13_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovlel %eax, %ecx +; CHECK-NEXT: cmovll %eax, %ecx ; CHECK-NEXT: imull %edi, %ecx ; CHECK-NEXT: .LBB13_2: # %return ; CHECK-NEXT: movslq %ecx, %rax @@ -563,9 +560,8 @@ ; CHECK-NEXT: cmpl $1, %edi ; CHECK-NEXT: jg .LBB18_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovlel %eax, %esi +; CHECK-NEXT: cmovll %eax, %esi ; CHECK-NEXT: imull %edi, %esi ; CHECK-NEXT: .LBB18_2: # %return ; CHECK-NEXT: movslq %esi, %rax