Skip to content

Commit 1ef6c59

Browse files
committedJul 6, 2016
[X86] Transform setcc + movzbl into xorl + setcc
xorl + setcc is generally the preferred sequence due to the partial register stall setcc + movzbl suffers from. As a bonus, it also encodes one byte smaller. This fixes PR28146. Differential Revision: http://reviews.llvm.org/D21774 llvm-svn: 274692
1 parent 0a53fdf commit 1ef6c59

32 files changed

+523
-288
lines changed
 

‎llvm/lib/Target/X86/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ set(sources
1919
X86FastISel.cpp
2020
X86FixupBWInsts.cpp
2121
X86FixupLEAs.cpp
22+
X86FixupSetCC.cpp
2223
X86FloatingPoint.cpp
2324
X86FrameLowering.cpp
2425
X86ISelDAGToDAG.cpp

‎llvm/lib/Target/X86/X86.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ FunctionPass *createX86FixupLEAs();
5959
/// recalculations.
6060
FunctionPass *createX86OptimizeLEAs();
6161

62+
/// Return a pass that transforms setcc + movzx pairs into xor + setcc.
63+
FunctionPass *createX86FixupSetCC();
64+
6265
/// Return a pass that expands WinAlloca pseudo-instructions.
6366
FunctionPass *createX86WinAllocaExpander();
6467

‎llvm/lib/Target/X86/X86FixupSetCC.cpp

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
//===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// This file defines a pass that fixes zero-extension of setcc patterns.
11+
// X86 setcc instructions are modeled to have no input arguments, and a single
12+
// GR8 output argument. This is consistent with other similar instructions
13+
// (e.g. movb), but means it is impossible to directly generate a setcc into
14+
// the lower GR8 of a specified GR32.
15+
// This means that ISel must select (zext (setcc)) into something like
16+
// seta %al; movzbl %al, %eax.
17+
// Unfortunately, this can cause a stall due to the partial register write
18+
// performed by the setcc. Instead, we can use:
19+
// xor %eax, %eax; seta %al
20+
// This both avoids the stall, and encodes shorter.
21+
//===----------------------------------------------------------------------===//
22+
23+
#include "X86.h"
24+
#include "X86InstrInfo.h"
25+
#include "X86Subtarget.h"
26+
#include "llvm/ADT/Statistic.h"
27+
#include "llvm/CodeGen/MachineFunctionPass.h"
28+
#include "llvm/CodeGen/MachineInstrBuilder.h"
29+
#include "llvm/CodeGen/MachineRegisterInfo.h"
30+
31+
using namespace llvm;
32+
33+
#define DEBUG_TYPE "x86-fixup-setcc"
34+
35+
STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted");
36+
37+
namespace {
38+
class X86FixupSetCCPass : public MachineFunctionPass {
39+
public:
40+
X86FixupSetCCPass() : MachineFunctionPass(ID) {}
41+
42+
const char *getPassName() const override { return "X86 Fixup SetCC"; }
43+
44+
bool runOnMachineFunction(MachineFunction &MF) override;
45+
46+
private:
47+
// Find the preceding instruction that imp-defs eflags.
48+
MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB,
49+
MachineBasicBlock::reverse_iterator MI);
50+
51+
// Return true if MI imp-uses eflags.
52+
bool impUsesFlags(MachineInstr *MI);
53+
54+
// Return true if this is the opcode of a SetCC instruction with a register
55+
// output.
56+
bool isSetCCr(unsigned Opode);
57+
58+
MachineRegisterInfo *MRI;
59+
const X86InstrInfo *TII;
60+
61+
enum { SearchBound = 16 };
62+
63+
static char ID;
64+
};
65+
66+
char X86FixupSetCCPass::ID = 0;
67+
}
68+
69+
FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); }
70+
71+
bool X86FixupSetCCPass::isSetCCr(unsigned Opcode) {
72+
switch (Opcode) {
73+
default:
74+
return false;
75+
case X86::SETOr:
76+
case X86::SETNOr:
77+
case X86::SETBr:
78+
case X86::SETAEr:
79+
case X86::SETEr:
80+
case X86::SETNEr:
81+
case X86::SETBEr:
82+
case X86::SETAr:
83+
case X86::SETSr:
84+
case X86::SETNSr:
85+
case X86::SETPr:
86+
case X86::SETNPr:
87+
case X86::SETLr:
88+
case X86::SETGEr:
89+
case X86::SETLEr:
90+
case X86::SETGr:
91+
return true;
92+
}
93+
}
94+
95+
// We expect the instruction *immediately* before the setcc to imp-def
96+
// EFLAGS (because of scheduling glue). To make this less brittle w.r.t
97+
// scheduling, look backwards until we hit the beginning of the
98+
// basic-block, or a small bound (to avoid quadratic behavior).
99+
MachineInstr *
100+
X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
101+
MachineBasicBlock::reverse_iterator MI) {
102+
auto MBBStart = MBB->instr_rend();
103+
for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI)
104+
for (auto &Op : MI->implicit_operands())
105+
if ((Op.getReg() == X86::EFLAGS) && (Op.isDef()))
106+
return &*MI;
107+
108+
return nullptr;
109+
}
110+
111+
bool X86FixupSetCCPass::impUsesFlags(MachineInstr *MI) {
112+
for (auto &Op : MI->implicit_operands())
113+
if ((Op.getReg() == X86::EFLAGS) && (Op.isUse()))
114+
return true;
115+
116+
return false;
117+
}
118+
119+
bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) {
120+
bool Changed = false;
121+
MRI = &MF.getRegInfo();
122+
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
123+
124+
SmallVector<MachineInstr*, 4> ToErase;
125+
126+
for (auto &MBB : MF) {
127+
for (auto &MI : MBB) {
128+
// Find a setcc that is used by a zext.
129+
// This doesn't have to be the only use, the transformation is safe
130+
// regardless.
131+
if (!isSetCCr(MI.getOpcode()))
132+
continue;
133+
134+
MachineInstr *ZExt = nullptr;
135+
for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg()))
136+
if (Use.getOpcode() == X86::MOVZX32rr8)
137+
ZExt = &Use;
138+
139+
if (!ZExt)
140+
continue;
141+
142+
// Find the preceding instruction that imp-defs eflags.
143+
MachineInstr *FlagsDefMI = findFlagsImpDef(
144+
MI.getParent(), MachineBasicBlock::reverse_iterator(&MI));
145+
if (!FlagsDefMI)
146+
continue;
147+
148+
// We'd like to put something that clobbers eflags directly before
149+
// FlagsDefMI. This can't hurt anything after FlagsDefMI, because
150+
// it, itself, by definition, clobbers eflags. But it may happen that
151+
// FlagsDefMI also *uses* eflags, in which case the transformation is
152+
// invalid.
153+
if (impUsesFlags(FlagsDefMI))
154+
continue;
155+
156+
++NumSubstZexts;
157+
Changed = true;
158+
159+
auto *RC = MRI->getRegClass(ZExt->getOperand(0).getReg());
160+
unsigned ZeroReg = MRI->createVirtualRegister(RC);
161+
unsigned InsertReg = MRI->createVirtualRegister(RC);
162+
163+
// Initialize a register with 0. This must go before the eflags def
164+
BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0),
165+
ZeroReg);
166+
167+
// X86 setcc only takes an output GR8, so fake a GR32 input by inserting
168+
// the setcc result into the low byte of the zeroed register.
169+
BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(),
170+
TII->get(X86::INSERT_SUBREG), InsertReg)
171+
.addReg(ZeroReg)
172+
.addReg(MI.getOperand(0).getReg())
173+
.addImm(X86::sub_8bit);
174+
MRI->replaceRegWith(ZExt->getOperand(0).getReg(), InsertReg);
175+
ToErase.push_back(ZExt);
176+
}
177+
}
178+
179+
for (auto &I : ToErase)
180+
I->eraseFromParent();
181+
182+
return Changed;
183+
}

‎llvm/lib/Target/X86/X86TargetMachine.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,6 @@ bool X86PassConfig::addInstSelector() {
285285
addPass(createCleanupLocalDynamicTLSPass());
286286

287287
addPass(createX86GlobalBaseRegPass());
288-
289288
return false;
290289
}
291290

@@ -305,6 +304,8 @@ bool X86PassConfig::addPreISel() {
305304
}
306305

307306
void X86PassConfig::addPreRegAlloc() {
307+
addPass(createX86FixupSetCC());
308+
308309
if (getOptLevel() != CodeGenOpt::None)
309310
addPass(createX86OptimizeLEAs());
310311

‎llvm/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movzbl
1+
; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xorl
22

33
define i32 @foo(<4 x float> %a, <4 x float> %b) nounwind {
44
entry:

‎llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ entry:
1010
; SOURCE-SCHED: subl
1111
; SOURCE-SCHED: movl
1212
; SOURCE-SCHED: sarl
13+
; SOURCE-SCHED: xorl
1314
; SOURCE-SCHED: cmpl
1415
; SOURCE-SCHED: setg
15-
; SOURCE-SCHED: movzbl
1616
; SOURCE-SCHED: movb
1717
; SOURCE-SCHED: xorl
1818
; SOURCE-SCHED: subl

0 commit comments

Comments
 (0)
Please sign in to comment.