Skip to content

Commit 5ecd81a

Browse files
committedMay 15, 2018
[x86][eflags] Fix PR37431 by teaching the EFLAGS copy lowering to
specially handle SETB_C* pseudo instructions. Summary: While the logic here is somewhat similar to the arithmetic lowering, it is different enough that it made sense to have its own function. I actually tried a bunch of different optimizations here and none worked well so I gave up and just always do the arithmetic based lowering. Looking at code from the PR test case, we actually pessimize a bunch of code when generating these. Because SETB_C* pseudo instructions clobber EFLAGS, we end up creating a bunch of copies of EFLAGS to feed multiple SETB_C* pseudos from a single set of EFLAGS. This in turn causes the lowering code to ruin all the clever code generation that SETB_C* was hoping to achieve. None of this is needed. Whenever we're generating multiple SETB_C* instructions from a single set of EFLAGS we should instead generate a single maximally wide one and extract subregs for all the different desired widths. That would result in substantially better code generation. But this patch doesn't attempt to address that. The test case from the PR is included as well as more directed testing of the specific lowering pattern used for these pseudos. Reviewers: craig.topper Subscribers: sanjoy, mcrosier, llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D46799 llvm-svn: 332389
1 parent ec8598e commit 5ecd81a

File tree

3 files changed

+272
-3
lines changed

3 files changed

+272
-3
lines changed
 

‎llvm/lib/Target/X86/X86FlagsCopyLowering.cpp

+142-3
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,10 @@ class X86FlagsCopyLoweringPass : public MachineFunctionPass {
127127
MachineInstr &JmpI, CondRegArray &CondRegs);
128128
void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
129129
MachineInstr &CopyDefI);
130+
void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
131+
MachineBasicBlock::iterator TestPos,
132+
DebugLoc TestLoc, MachineInstr &SetBI,
133+
MachineOperand &FlagUse, CondRegArray &CondRegs);
130134
void rewriteSetCC(MachineBasicBlock &TestMBB,
131135
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
132136
MachineInstr &SetCCI, MachineOperand &FlagUse,
@@ -512,8 +516,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
512516
} else if (MI.getOpcode() == TargetOpcode::COPY) {
513517
rewriteCopy(MI, *FlagUse, CopyDefI);
514518
} else {
515-
// We assume that arithmetic instructions that use flags also def
516-
// them.
519+
// We assume all other instructions that use flags also def them.
517520
assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
518521
"Expected a def of EFLAGS for this instruction!");
519522

@@ -525,7 +528,23 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
525528
// logic.
526529
FlagsKilled = true;
527530

528-
rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
531+
switch (MI.getOpcode()) {
532+
case X86::SETB_C8r:
533+
case X86::SETB_C16r:
534+
case X86::SETB_C32r:
535+
case X86::SETB_C64r:
536+
// Use custom lowering for arithmetic that is merely extending the
537+
// carry flag. We model this as the SETB_C* pseudo instructions.
538+
rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
539+
CondRegs);
540+
break;
541+
542+
default:
543+
// Generically handle remaining uses as arithmetic instructions.
544+
rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
545+
CondRegs);
546+
break;
547+
}
529548
break;
530549
}
531550

@@ -753,6 +772,126 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
753772
MI.eraseFromParent();
754773
}
755774

775+
void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
776+
MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
777+
DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
778+
CondRegArray &CondRegs) {
779+
// This routine is only used to handle pseudos for setting a register to zero
780+
// or all ones based on CF. This is essentially the sign extended from 1-bit
781+
// form of SETB and modeled with the SETB_C* pseudos. They require special
782+
// handling as they aren't normal SETcc instructions and are lowered to an
783+
// EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
784+
// they are only provided in reg-defining forms. A complicating factor is that
785+
// they can define many different register widths.
786+
assert(SetBI.getOperand(0).isReg() &&
787+
"Cannot have a non-register defined operand to this variant of SETB!");
788+
789+
// Little helper to do the common final step of replacing the register def'ed
790+
// by this SETB instruction with a new register and removing the SETB
791+
// instruction.
792+
auto RewriteToReg = [&](unsigned Reg) {
793+
MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
794+
SetBI.eraseFromParent();
795+
};
796+
797+
// Grab the register class used for this particular instruction.
798+
auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());
799+
800+
MachineBasicBlock &MBB = *SetBI.getParent();
801+
auto SetPos = SetBI.getIterator();
802+
auto SetLoc = SetBI.getDebugLoc();
803+
804+
auto AdjustReg = [&](unsigned Reg) {
805+
auto &OrigRC = *MRI->getRegClass(Reg);
806+
if (&OrigRC == &SetBRC)
807+
return Reg;
808+
809+
unsigned NewReg;
810+
811+
int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
812+
int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
813+
assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
814+
assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
815+
int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
816+
X86::NoSubRegister, X86::sub_32bit};
817+
818+
// If the original size is smaller than the target *and* is smaller than 4
819+
// bytes, we need to explicitly zero extend it. We always extend to 4-bytes
820+
// to maximize the chance of being able to CSE that operation and to avoid
821+
// partial dependency stalls extending to 2-bytes.
822+
if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
823+
NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
824+
BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
825+
.addReg(Reg);
826+
if (&SetBRC == &X86::GR32RegClass)
827+
return NewReg;
828+
Reg = NewReg;
829+
OrigRegSize = 4;
830+
}
831+
832+
NewReg = MRI->createVirtualRegister(&SetBRC);
833+
if (OrigRegSize < TargetRegSize) {
834+
BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
835+
NewReg)
836+
.addImm(0)
837+
.addReg(Reg)
838+
.addImm(SubRegIdx[OrigRegSize]);
839+
} else if (OrigRegSize > TargetRegSize) {
840+
BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
841+
NewReg)
842+
.addReg(Reg)
843+
.addImm(SubRegIdx[TargetRegSize]);
844+
} else {
845+
BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
846+
.addReg(Reg);
847+
}
848+
return NewReg;
849+
};
850+
851+
unsigned &CondReg = CondRegs[X86::COND_B];
852+
if (!CondReg)
853+
CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);
854+
855+
// Adjust the condition to have the desired register width by zero-extending
856+
// as needed.
857+
// FIXME: We should use a better API to avoid the local reference and using a
858+
// different variable here.
859+
unsigned ExtCondReg = AdjustReg(CondReg);
860+
861+
// Now we need to turn this into a bitmask. We do this by subtracting it from
862+
// zero.
863+
unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
864+
BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
865+
ZeroReg = AdjustReg(ZeroReg);
866+
867+
unsigned Sub;
868+
switch (SetBI.getOpcode()) {
869+
case X86::SETB_C8r:
870+
Sub = X86::SUB8rr;
871+
break;
872+
873+
case X86::SETB_C16r:
874+
Sub = X86::SUB16rr;
875+
break;
876+
877+
case X86::SETB_C32r:
878+
Sub = X86::SUB32rr;
879+
break;
880+
881+
case X86::SETB_C64r:
882+
Sub = X86::SUB64rr;
883+
break;
884+
885+
default:
886+
llvm_unreachable("Invalid SETB_C* opcode!");
887+
}
888+
unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
889+
BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
890+
.addReg(ZeroReg)
891+
.addReg(ExtCondReg);
892+
return RewriteToReg(ResultReg);
893+
}
894+
756895
void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
757896
MachineBasicBlock::iterator TestPos,
758897
DebugLoc TestLoc,

‎llvm/test/CodeGen/X86/copy-eflags.ll

+59
Original file line numberDiff line numberDiff line change
@@ -304,3 +304,62 @@ bb1:
304304
%tmp12 = trunc i32 %tmp11 to i16
305305
br label %bb1
306306
}
307+
308+
; Use a particular instruction pattern in order to lower to the post-RA pseudo
309+
; used to lower SETB into an SBB pattern in order to make sure that kind of
310+
; usage of a copied EFLAGS continues to work.
311+
define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
312+
; X32-LABEL: PR37431:
313+
; X32: # %bb.0: # %entry
314+
; X32-NEXT: pushl %esi
315+
; X32-NEXT: .cfi_def_cfa_offset 8
316+
; X32-NEXT: .cfi_offset %esi, -8
317+
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
318+
; X32-NEXT: movl (%eax), %eax
319+
; X32-NEXT: movl %eax, %ecx
320+
; X32-NEXT: sarl $31, %ecx
321+
; X32-NEXT: cmpl %eax, %eax
322+
; X32-NEXT: sbbl %ecx, %eax
323+
; X32-NEXT: setb %al
324+
; X32-NEXT: sbbb %cl, %cl
325+
; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
326+
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
327+
; X32-NEXT: movb %cl, (%edx)
328+
; X32-NEXT: movzbl %al, %eax
329+
; X32-NEXT: xorl %ecx, %ecx
330+
; X32-NEXT: subl %eax, %ecx
331+
; X32-NEXT: xorl %eax, %eax
332+
; X32-NEXT: xorl %edx, %edx
333+
; X32-NEXT: idivl %ecx
334+
; X32-NEXT: movb %dl, (%esi)
335+
; X32-NEXT: popl %esi
336+
; X32-NEXT: .cfi_def_cfa_offset 4
337+
; X32-NEXT: retl
338+
;
339+
; X64-LABEL: PR37431:
340+
; X64: # %bb.0: # %entry
341+
; X64-NEXT: movq %rdx, %rcx
342+
; X64-NEXT: movslq (%rdi), %rax
343+
; X64-NEXT: cmpq %rax, %rax
344+
; X64-NEXT: sbbb %dl, %dl
345+
; X64-NEXT: cmpq %rax, %rax
346+
; X64-NEXT: movb %dl, (%rsi)
347+
; X64-NEXT: sbbl %esi, %esi
348+
; X64-NEXT: xorl %eax, %eax
349+
; X64-NEXT: xorl %edx, %edx
350+
; X64-NEXT: idivl %esi
351+
; X64-NEXT: movb %dl, (%rcx)
352+
; X64-NEXT: retq
353+
entry:
354+
%tmp = load i32, i32* %arg1
355+
%tmp1 = sext i32 %tmp to i64
356+
%tmp2 = icmp ugt i64 %tmp1, undef
357+
%tmp3 = zext i1 %tmp2 to i8
358+
%tmp4 = sub i8 0, %tmp3
359+
store i8 %tmp4, i8* %arg2
360+
%tmp5 = sext i8 %tmp4 to i32
361+
%tmp6 = srem i32 0, %tmp5
362+
%tmp7 = trunc i32 %tmp6 to i8
363+
store i8 %tmp7, i8* %arg3
364+
ret void
365+
}

‎llvm/test/CodeGen/X86/flags-copy-lowering.mir

+71
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@
6666
call void @foo()
6767
ret void
6868
}
69+
70+
define void @test_setb_c(i64 %a, i64 %b) {
71+
entry:
72+
call void @foo()
73+
ret void
74+
}
6975
...
7076
---
7177
name: test_branch
@@ -482,3 +488,68 @@ body: |
482488
RET 0
483489
484490
...
491+
---
492+
name: test_setb_c
493+
# CHECK-LABEL: name: test_setb_c
494+
liveins:
495+
- { reg: '$rdi', virtual-reg: '%0' }
496+
- { reg: '$rsi', virtual-reg: '%1' }
497+
body: |
498+
bb.0:
499+
liveins: $rdi, $rsi
500+
501+
%0:gr64 = COPY $rdi
502+
%1:gr64 = COPY $rsi
503+
%2:gr64 = ADD64rr %0, %1, implicit-def $eflags
504+
%3:gr64 = COPY $eflags
505+
; CHECK-NOT: COPY{{( killed)?}} $eflags
506+
; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
507+
; CHECK-NOT: COPY{{( killed)?}} $eflags
508+
509+
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
510+
CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
511+
ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
512+
513+
$eflags = COPY %3
514+
%4:gr8 = SETB_C8r implicit-def $eflags, implicit $eflags
515+
MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4
516+
; CHECK-NOT: $eflags =
517+
; CHECK: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
518+
; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit
519+
; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
520+
; CHECK-NEXT: MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
521+
522+
$eflags = COPY %3
523+
%5:gr16 = SETB_C16r implicit-def $eflags, implicit $eflags
524+
MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %5
525+
; CHECK-NOT: $eflags =
526+
; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
527+
; CHECK-NEXT: %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit
528+
; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
529+
; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit
530+
; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
531+
; CHECK-NEXT: MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
532+
533+
$eflags = COPY %3
534+
%6:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
535+
MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %6
536+
; CHECK-NOT: $eflags =
537+
; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
538+
; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
539+
; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
540+
; CHECK-NEXT: MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
541+
542+
$eflags = COPY %3
543+
%7:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
544+
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %7
545+
; CHECK-NOT: $eflags =
546+
; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
547+
; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
548+
; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
549+
; CHECK-NEXT: %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
550+
; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
551+
; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]
552+
553+
RET 0
554+
555+
...

0 commit comments

Comments
 (0)
Please sign in to comment.