Diff 146904

llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp

Show First 20 Lines • Show All 121 Lines • ▼ Show 20 Lines	void rewriteCMov(MachineBasicBlock &TestMBB,
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,		MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
MachineInstr &CMovI, MachineOperand &FlagUse,		MachineInstr &CMovI, MachineOperand &FlagUse,
CondRegArray &CondRegs);		CondRegArray &CondRegs);
void rewriteCondJmp(MachineBasicBlock &TestMBB,		void rewriteCondJmp(MachineBasicBlock &TestMBB,
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,		MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
MachineInstr &JmpI, CondRegArray &CondRegs);		MachineInstr &JmpI, CondRegArray &CondRegs);
void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,		void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
MachineInstr &CopyDefI);		MachineInstr &CopyDefI);
		void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
		MachineBasicBlock::iterator TestPos,
		DebugLoc TestLoc, MachineInstr &SetBI,
		MachineOperand &FlagUse, CondRegArray &CondRegs);
void rewriteSetCC(MachineBasicBlock &TestMBB,		void rewriteSetCC(MachineBasicBlock &TestMBB,
MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,		MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
MachineInstr &SetCCI, MachineOperand &FlagUse,		MachineInstr &SetCCI, MachineOperand &FlagUse,
CondRegArray &CondRegs);		CondRegArray &CondRegs);
};		};

} // end anonymous namespace		} // end anonymous namespace

▲ Show 20 Lines • Show All 369 Lines • ▼ Show 20 Lines	do {
if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {		if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);		rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
} else if (X86::getCondFromSETOpc(MI.getOpcode()) !=		} else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
X86::COND_INVALID) {		X86::COND_INVALID) {
rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);		rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
} else if (MI.getOpcode() == TargetOpcode::COPY) {		} else if (MI.getOpcode() == TargetOpcode::COPY) {
rewriteCopy(MI, *FlagUse, CopyDefI);		rewriteCopy(MI, *FlagUse, CopyDefI);
} else {		} else {
// We assume that arithmetic instructions that use flags also def		// We assume all other instructions that use flags also def them.
// them.
assert(MI.findRegisterDefOperand(X86::EFLAGS) &&		assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
"Expected a def of EFLAGS for this instruction!");		"Expected a def of EFLAGS for this instruction!");

// NB!!! Several arithmetic instructions only partially update		// NB!!! Several arithmetic instructions only partially update
// flags. Theoretically, we could generate MI code sequences that		// flags. Theoretically, we could generate MI code sequences that
// would rely on this fact and observe different flags independently.		// would rely on this fact and observe different flags independently.
// But currently LLVM models all of these instructions as clobbering		// But currently LLVM models all of these instructions as clobbering
// all the flags in an undef way. We rely on that to simplify the		// all the flags in an undef way. We rely on that to simplify the
// logic.		// logic.
FlagsKilled = true;		FlagsKilled = true;

rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);		switch (MI.getOpcode()) {
		case X86::SETB_C8r:
		case X86::SETB_C16r:
		case X86::SETB_C32r:
		case X86::SETB_C64r:
		// Use custom lowering for arithmetic that is merely extending the
		// carry flag. We model this as the SETB_C* pseudo instructions.
		rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
		CondRegs);
		break;

		default:
		// Generically handle remaining uses as arithmetic instructions.
		rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
		CondRegs);
		break;
		}
break;		break;
}		}

// If this was the last use of the flags, we're done.		// If this was the last use of the flags, we're done.
if (FlagsKilled)		if (FlagsKilled)
break;		break;
}		}

▲ Show 20 Lines • Show All 211 Lines • ▼ Show 20 Lines	void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
MachineOperand &FlagUse,		MachineOperand &FlagUse,
MachineInstr &CopyDefI) {		MachineInstr &CopyDefI) {
// Just replace this copy with the original copy def.		// Just replace this copy with the original copy def.
MRI->replaceRegWith(MI.getOperand(0).getReg(),		MRI->replaceRegWith(MI.getOperand(0).getReg(),
CopyDefI.getOperand(0).getReg());		CopyDefI.getOperand(0).getReg());
MI.eraseFromParent();		MI.eraseFromParent();
}		}

		void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
		MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
		DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
		CondRegArray &CondRegs) {
		// This routine is only used to handle pseudos for setting a register to zero
		// or all ones based on CF. This is essentially the sign extended from 1-bit
		// form of SETB and modeled with the SETB_C* pseudos. They require special
		// handling as they aren't normal SETcc instructions and are lowered to an
		// EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
		// they are only provided in reg-defining forms. A complicating factor is that
		// they can define many different register widths.
		assert(SetBI.getOperand(0).isReg() &&
		"Cannot have a non-register defined operand to this variant of SETB!");

		// Little helper to do the common final step of replacing the register def'ed
		// by this SETB instruction with a new register and removing the SETB
		// instruction.
		auto RewriteToReg = [&](unsigned Reg) {
		MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
		SetBI.eraseFromParent();
		};

		// Grab the register class used for this particular instruction.
		auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());

		MachineBasicBlock &MBB = *SetBI.getParent();
		auto SetPos = SetBI.getIterator();
		auto SetLoc = SetBI.getDebugLoc();

		auto AdjustReg = [&](unsigned Reg) {
		auto &OrigRC = *MRI->getRegClass(Reg);
		if (&OrigRC == &SetBRC)
		return Reg;

		unsigned NewReg;

		int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
		int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
		assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
		assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
		int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
		X86::NoSubRegister, X86::sub_32bit};

		// If the original size is smaller than the target and is smaller than 4
		// bytes, we need to explicitly zero extend it. We always extend to 4-bytes
		// to maximize the chance of being able to CSE that operation and to avoid
		// partial dependency stalls extending to 2-bytes.
		if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
		NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
		BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
		.addReg(Reg);
		if (&SetBRC == &X86::GR32RegClass)
		return NewReg;
		Reg = NewReg;
		OrigRegSize = 4;
		}

		NewReg = MRI->createVirtualRegister(&SetBRC);
		if (OrigRegSize < TargetRegSize) {
		BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
		NewReg)
		.addImm(0)
		.addReg(Reg)
		.addImm(SubRegIdx[OrigRegSize]);
		} else if (OrigRegSize > TargetRegSize) {
		BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
		NewReg)
		.addReg(Reg)
		.addImm(SubRegIdx[TargetRegSize]);
		} else {
		BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
		.addReg(Reg);
		}
		return NewReg;
		};

		unsigned &CondReg = CondRegs[X86::COND_B];
		if (!CondReg)
		CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);

		// Adjust the condition to have the desired register width by zero-extending
		// as needed.
		// FIXME: We should use a better API to avoid the local reference and using a
		// different variable here.
		unsigned ExtCondReg = AdjustReg(CondReg);

		// Now we need to turn this into a bitmask. We do this by subtracting it from
		// zero.
		unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
		BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
		ZeroReg = AdjustReg(ZeroReg);

		unsigned Sub;
		switch (SetBI.getOpcode()) {
		case X86::SETB_C8r:
		Sub = X86::SUB8rr;
		break;

		case X86::SETB_C16r:
		Sub = X86::SUB16rr;
		break;

		case X86::SETB_C32r:
		Sub = X86::SUB32rr;
		break;

		case X86::SETB_C64r:
		Sub = X86::SUB64rr;
		break;

		default:
		llvm_unreachable("Invalid SETB_C* opcode!");
		}
		unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
		BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
		.addReg(ZeroReg)
		.addReg(ExtCondReg);
		return RewriteToReg(ResultReg);
		}

void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,		void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
MachineBasicBlock::iterator TestPos,		MachineBasicBlock::iterator TestPos,
DebugLoc TestLoc,		DebugLoc TestLoc,
MachineInstr &SetCCI,		MachineInstr &SetCCI,
MachineOperand &FlagUse,		MachineOperand &FlagUse,
CondRegArray &CondRegs) {		CondRegArray &CondRegs) {
X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode());		X86::CondCode Cond = X86::getCondFromSETOpc(SetCCI.getOpcode());
// Note that we can't usefully rewrite this to the inverse without complex		// Note that we can't usefully rewrite this to the inverse without complex
Show All 30 Lines

llvm/trunk/test/CodeGen/X86/copy-eflags.ll

Show First 20 Lines • Show All 298 Lines • ▼ Show 20 Lines	bb1:
%tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4		%tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
store volatile i8 %tmp8, i8* %ptr1		store volatile i8 %tmp8, i8* %ptr1
%tmp9 = load volatile i32, i32* %ptr2		%tmp9 = load volatile i32, i32* %ptr2
%tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9		%tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
%tmp11 = srem i32 0, %tmp10		%tmp11 = srem i32 0, %tmp10
%tmp12 = trunc i32 %tmp11 to i16		%tmp12 = trunc i32 %tmp11 to i16
br label %bb1		br label %bb1
}		}

		; Use a particular instruction pattern in order to lower to the post-RA pseudo
		; used to lower SETB into an SBB pattern in order to make sure that kind of
		; usage of a copied EFLAGS continues to work.
		define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
		; X32-LABEL: PR37431:
		; X32: # %bb.0: # %entry
		; X32-NEXT: pushl %esi
		; X32-NEXT: .cfi_def_cfa_offset 8
		; X32-NEXT: .cfi_offset %esi, -8
		; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X32-NEXT: movl (%eax), %eax
		; X32-NEXT: movl %eax, %ecx
		; X32-NEXT: sarl $31, %ecx
		; X32-NEXT: cmpl %eax, %eax
		; X32-NEXT: sbbl %ecx, %eax
		; X32-NEXT: setb %al
		; X32-NEXT: sbbb %cl, %cl
		; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
		; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
		; X32-NEXT: movb %cl, (%edx)
		; X32-NEXT: movzbl %al, %eax
		; X32-NEXT: xorl %ecx, %ecx
		; X32-NEXT: subl %eax, %ecx
		; X32-NEXT: xorl %eax, %eax
		; X32-NEXT: xorl %edx, %edx
		; X32-NEXT: idivl %ecx
		; X32-NEXT: movb %dl, (%esi)
		; X32-NEXT: popl %esi
		; X32-NEXT: .cfi_def_cfa_offset 4
		; X32-NEXT: retl
		;
		; X64-LABEL: PR37431:
		; X64: # %bb.0: # %entry
		; X64-NEXT: movq %rdx, %rcx
		; X64-NEXT: movslq (%rdi), %rax
		; X64-NEXT: cmpq %rax, %rax
		; X64-NEXT: sbbb %dl, %dl
		; X64-NEXT: cmpq %rax, %rax
		; X64-NEXT: movb %dl, (%rsi)
		; X64-NEXT: sbbl %esi, %esi
		; X64-NEXT: xorl %eax, %eax
		; X64-NEXT: xorl %edx, %edx
		; X64-NEXT: idivl %esi
		; X64-NEXT: movb %dl, (%rcx)
		; X64-NEXT: retq
		entry:
		%tmp = load i32, i32* %arg1
		%tmp1 = sext i32 %tmp to i64
		%tmp2 = icmp ugt i64 %tmp1, undef
		%tmp3 = zext i1 %tmp2 to i8
		%tmp4 = sub i8 0, %tmp3
		store i8 %tmp4, i8* %arg2
		%tmp5 = sext i8 %tmp4 to i32
		%tmp6 = srem i32 0, %tmp5
		%tmp7 = trunc i32 %tmp6 to i8
		store i8 %tmp7, i8* %arg3
		ret void
		}

llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir

Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	entry:
ret void		ret void
}		}

define void @test_rcr(i64 %a, i64 %b) {		define void @test_rcr(i64 %a, i64 %b) {
entry:		entry:
call void @foo()		call void @foo()
ret void		ret void
}		}

		define void @test_setb_c(i64 %a, i64 %b) {
		entry:
		call void @foo()
		ret void
		}
...		...
---		---
name: test_branch		name: test_branch
# CHECK-LABEL: name: test_branch		# CHECK-LABEL: name: test_branch
liveins:		liveins:
- { reg: '$rdi', virtual-reg: '%0' }		- { reg: '$rdi', virtual-reg: '%0' }
- { reg: '$rsi', virtual-reg: '%1' }		- { reg: '$rsi', virtual-reg: '%1' }
body: \|		body: \|
▲ Show 20 Lines • Show All 400 Lines • ▼ Show 20 Lines	body: \|
; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags		; CHECK: dead %{{[^:]*}}:gr8 = ADD8ri %[[CF_REG]], 255, implicit-def $eflags
; CHECK-NEXT: %4:gr64 = RCR64r1 %2, implicit-def $eflags, implicit killed $eflags		; CHECK-NEXT: %4:gr64 = RCR64r1 %2, implicit-def $eflags, implicit killed $eflags
; CHECK-NEXT: %5:gr64 = RCR64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags		; CHECK-NEXT: %5:gr64 = RCR64r1 %4, implicit-def{{( dead)?}} $eflags, implicit{{( killed)?}} $eflags
MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5		MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %5

RET 0		RET 0

...		...
		---
		name: test_setb_c
		# CHECK-LABEL: name: test_setb_c
		liveins:
		- { reg: '$rdi', virtual-reg: '%0' }
		- { reg: '$rsi', virtual-reg: '%1' }
		body: \|
		bb.0:
		liveins: $rdi, $rsi

		%0:gr64 = COPY $rdi
		%1:gr64 = COPY $rsi
		%2:gr64 = ADD64rr %0, %1, implicit-def $eflags
		%3:gr64 = COPY $eflags
		; CHECK-NOT: COPY{{( killed)?}} $eflags
		; CHECK: %[[CF_REG:[^:]*]]:gr8 = SETBr implicit $eflags
		; CHECK-NOT: COPY{{( killed)?}} $eflags

		ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
		CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
		ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp

		$eflags = COPY %3
		%4:gr8 = SETB_C8r implicit-def $eflags, implicit $eflags
		MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %4
		; CHECK-NOT: $eflags =
		; CHECK: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
		; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit
		; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
		; CHECK-NEXT: MOV8mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]

		$eflags = COPY %3
		%5:gr16 = SETB_C16r implicit-def $eflags, implicit $eflags
		MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %5
		; CHECK-NOT: $eflags =
		; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
		; CHECK-NEXT: %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit
		; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
		; CHECK-NEXT: %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit
		; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
		; CHECK-NEXT: MOV16mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]

		$eflags = COPY %3
		%6:gr32 = SETB_C32r implicit-def $eflags, implicit $eflags
		MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %6
		; CHECK-NOT: $eflags =
		; CHECK: %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
		; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
		; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
		; CHECK-NEXT: MOV32mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]

		$eflags = COPY %3
		%7:gr64 = SETB_C64r implicit-def $eflags, implicit $eflags
		MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %7
		; CHECK-NOT: $eflags =
		; CHECK: %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
		; CHECK-NEXT: %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
		; CHECK-NEXT: %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def $eflags
		; CHECK-NEXT: %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
		; CHECK-NEXT: %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
		; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -16, $noreg, killed %[[REPLACEMENT]]

		RET 0

		...

This is an archive of the discontinued LLVM Phabricator instance.

[x86][eflags] Fix PR37431 by teaching the EFLAGS copy lowering to specifically handle SETB_C* pseudo instructions.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 146904

llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp

llvm/trunk/test/CodeGen/X86/copy-eflags.ll

llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir

This is an archive of the discontinued LLVM Phabricator instance.

[x86][eflags] Fix PR37431 by teaching the EFLAGS copy lowering to specifically handle SETB_C* pseudo instructions.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 146904

llvm/trunk/lib/Target/X86/X86FlagsCopyLowering.cpp

llvm/trunk/test/CodeGen/X86/copy-eflags.ll

llvm/trunk/test/CodeGen/X86/flags-copy-lowering.mir

[x86][eflags] Fix PR37431 by teaching the EFLAGS copy lowering to specifically handle SETB_C* pseudo instructions.
ClosedPublic