Skip to content

Commit 594f963

Browse files
committedMay 14, 2014
X86: If we have an instruction that sets a flag and a zero test on the input of that instruction try to eliminate the test.
For example tzcntl %edi, %ebx testl %edi, %edi je .label can be rewritten into tzcntl %edi, %ebx jb .label A minor complication is that tzcnt sets CF instead of ZF when the input is zero, we have to rewrite users of the flags from ZF to CF. Currently we recognize patterns using lzcnt, tzcnt and popcnt. Differential Revision: http://reviews.llvm.org/D3454 llvm-svn: 208788
1 parent 47c2f84 commit 594f963

File tree

2 files changed

+138
-4
lines changed

2 files changed

+138
-4
lines changed
 

‎llvm/lib/Target/X86/X86InstrInfo.cpp

+63-3
Original file line numberDiff line numberDiff line change
@@ -3559,6 +3559,26 @@ inline static bool isDefConvertible(MachineInstr *MI) {
35593559
}
35603560
}
35613561

3562+
/// isUseDefConvertible - check whether the use can be converted
3563+
/// to remove a comparison against zero.
3564+
static X86::CondCode isUseDefConvertible(MachineInstr *MI) {
3565+
switch (MI->getOpcode()) {
3566+
default: return X86::COND_INVALID;
3567+
case X86::LZCNT16rr: case X86::LZCNT16rm:
3568+
case X86::LZCNT32rr: case X86::LZCNT32rm:
3569+
case X86::LZCNT64rr: case X86::LZCNT64rm:
3570+
return X86::COND_B;
3571+
case X86::POPCNT16rr:case X86::POPCNT16rm:
3572+
case X86::POPCNT32rr:case X86::POPCNT32rm:
3573+
case X86::POPCNT64rr:case X86::POPCNT64rm:
3574+
return X86::COND_E;
3575+
case X86::TZCNT16rr: case X86::TZCNT16rm:
3576+
case X86::TZCNT32rr: case X86::TZCNT32rm:
3577+
case X86::TZCNT64rr: case X86::TZCNT64rm:
3578+
return X86::COND_B;
3579+
}
3580+
}
3581+
35623582
/// optimizeCompareInstr - Check if there exists an earlier instruction that
35633583
/// operates on the same source operands and sets flags in the same way as
35643584
/// Compare; remove Compare if possible.
@@ -3625,10 +3645,35 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
36253645
// If we are comparing against zero, check whether we can use MI to update
36263646
// EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
36273647
bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
3628-
if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() ||
3629-
!isDefConvertible(MI)))
3648+
if (IsCmpZero && MI->getParent() != CmpInstr->getParent())
36303649
return false;
36313650

3651+
// If we have a use of the source register between the def and our compare
3652+
// instruction we can eliminate the compare iff the use sets EFLAGS in the
3653+
// right way.
3654+
bool ShouldUpdateCC = false;
3655+
X86::CondCode NewCC = X86::COND_INVALID;
3656+
if (IsCmpZero && !isDefConvertible(MI)) {
3657+
// Scan forward from the use until we hit the use we're looking for or the
3658+
// compare instruction.
3659+
for (MachineBasicBlock::iterator J = MI;; ++J) {
3660+
// Do we have a convertible instruction?
3661+
NewCC = isUseDefConvertible(J);
3662+
if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
3663+
J->getOperand(1).getReg() == SrcReg) {
3664+
assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
3665+
ShouldUpdateCC = true; // Update CC later on.
3666+
// This is not a def of SrcReg, but still a def of EFLAGS. Keep going
3667+
// with the new def.
3668+
MI = Def = J;
3669+
break;
3670+
}
3671+
3672+
if (J == I)
3673+
return false;
3674+
}
3675+
}
3676+
36323677
// We are searching for an earlier instruction that can make CmpInstr
36333678
// redundant and that instruction will be saved in Sub.
36343679
MachineInstr *Sub = nullptr;
@@ -3726,13 +3771,28 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
37263771
// CF and OF are used, we can't perform this optimization.
37273772
return false;
37283773
}
3774+
3775+
// If we're updating the condition code check if we have to reverse the
3776+
// condition.
3777+
if (ShouldUpdateCC)
3778+
switch (OldCC) {
3779+
default:
3780+
return false;
3781+
case X86::COND_E:
3782+
break;
3783+
case X86::COND_NE:
3784+
NewCC = GetOppositeBranchCondition(NewCC);
3785+
break;
3786+
}
37293787
} else if (IsSwapped) {
37303788
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
37313789
// to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
37323790
// We swap the condition code and synthesize the new opcode.
3733-
X86::CondCode NewCC = getSwappedCondition(OldCC);
3791+
NewCC = getSwappedCondition(OldCC);
37343792
if (NewCC == X86::COND_INVALID) return false;
3793+
}
37353794

3795+
if ((ShouldUpdateCC || IsSwapped) && NewCC != OldCC) {
37363796
// Synthesize the new opcode.
37373797
bool HasMemoryOperand = Instr.hasOneMemOperand();
37383798
unsigned NewOpc;

‎llvm/test/CodeGen/X86/peep-test-4.ll

+75-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+bmi2,+popcnt | FileCheck %s
1+
; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+bmi,+bmi2,+popcnt,+lzcnt | FileCheck %s
22
declare void @foo(i32)
3+
declare void @foo32(i32)
34
declare void @foo64(i64)
45

56
; CHECK-LABEL: neg:
@@ -189,3 +190,76 @@ bb:
189190
return:
190191
ret void
191192
}
193+
194+
; CHECK-LABEL: testCTZ
195+
; CHECK: tzcntq
196+
; CHECK-NOT: test
197+
; CHECK: cmovaeq
198+
declare i64 @llvm.cttz.i64(i64, i1)
199+
define i64 @testCTZ(i64 %v) nounwind {
200+
%cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
201+
%tobool = icmp eq i64 %v, 0
202+
%cond = select i1 %tobool, i64 255, i64 %cnt
203+
ret i64 %cond
204+
}
205+
206+
; CHECK-LABEL: testCTZ2
207+
; CHECK: tzcntl
208+
; CHECK-NEXT: jb
209+
; CHECK: jmp foo
210+
declare i32 @llvm.cttz.i32(i32, i1)
211+
define void @testCTZ2(i32 %v) nounwind {
212+
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
213+
%cmp = icmp eq i32 %v, 0
214+
br i1 %cmp, label %return, label %bb
215+
216+
bb:
217+
tail call void @foo(i32 %cnt)
218+
br label %return
219+
220+
return:
221+
tail call void @foo32(i32 %cnt)
222+
ret void
223+
}
224+
225+
; CHECK-LABEL: testCTZ3
226+
; CHECK: tzcntl
227+
; CHECK-NEXT: jae
228+
; CHECK: jmp foo
229+
define void @testCTZ3(i32 %v) nounwind {
230+
%cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
231+
%cmp = icmp ne i32 %v, 0
232+
br i1 %cmp, label %return, label %bb
233+
234+
bb:
235+
tail call void @foo(i32 %cnt)
236+
br label %return
237+
238+
return:
239+
tail call void @foo32(i32 %cnt)
240+
ret void
241+
}
242+
243+
; CHECK-LABEL: testCLZ
244+
; CHECK: lzcntq
245+
; CHECK-NOT: test
246+
; CHECK: cmovaeq
247+
declare i64 @llvm.ctlz.i64(i64, i1)
248+
define i64 @testCLZ(i64 %v) nounwind {
249+
%cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
250+
%tobool = icmp ne i64 %v, 0
251+
%cond = select i1 %tobool, i64 %cnt, i64 255
252+
ret i64 %cond
253+
}
254+
255+
; CHECK-LABEL: testPOPCNT
256+
; CHECK: popcntq
257+
; CHECK-NOT: test
258+
; CHECK: cmovneq
259+
declare i64 @llvm.ctpop.i64(i64)
260+
define i64 @testPOPCNT(i64 %v) nounwind {
261+
%cnt = tail call i64 @llvm.ctpop.i64(i64 %v)
262+
%tobool = icmp ne i64 %v, 0
263+
%cond = select i1 %tobool, i64 %cnt, i64 255
264+
ret i64 %cond
265+
}

0 commit comments

Comments
 (0)
Please sign in to comment.