Diff 15727

llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp

Show First 20 Lines • Show All 2,406 Lines • ▼ Show 20 Lines	if (!SDValue(Node, 1).use_empty()) {
ReplaceUses(SDValue(Node, 1), ResHi);		ReplaceUses(SDValue(Node, 1), ResHi);
DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');		DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}		}

return nullptr;		return nullptr;
}		}

case ISD::SDIVREM:		case ISD::SDIVREM:
case ISD::UDIVREM: {		case ISD::UDIVREM:
		case X86ISD::SDIVREM8_SEXT_HREG:
		case X86ISD::UDIVREM8_ZEXT_HREG: {
SDValue N0 = Node->getOperand(0);		SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);		SDValue N1 = Node->getOperand(1);

bool isSigned = Opcode == ISD::SDIVREM;		bool isSigned = (Opcode == ISD::SDIVREM \|\|
		Opcode == X86ISD::SDIVREM8_SEXT_HREG);
if (!isSigned) {		if (!isSigned) {
switch (NVT.SimpleTy) {		switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");		default: llvm_unreachable("Unsupported VT!");
case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;		case MVT::i8: Opc = X86::DIV8r; MOpc = X86::DIV8m; break;
case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;		case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;		case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;		case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
}		}
▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines	if (foldedLoad) {
InFlag = SDValue(CNode, 1);		InFlag = SDValue(CNode, 1);
// Update the chain.		// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));		ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
} else {		} else {
InFlag =		InFlag =
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);		SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
}		}

// Prevent use of AH in a REX instruction by referencing AX instead.		// Prevent use of AH in a REX instruction by explicitly copying it to
// Shift it down 8 bits.		// an ABCD_L register.
//		//
// The current assumption of the register allocator is that isel		// The current assumption of the register allocator is that isel
// won't generate explicit references to the GPR8_NOREX registers. If		// won't generate explicit references to the GR8_ABCD_H registers. If
// the allocator and/or the backend get enhanced to be more robust in		// the allocator and/or the backend get enhanced to be more robust in
// that regard, this can be, and should be, removed.		// that regard, this can be, and should be, removed.
if (HiReg == X86::AH && Subtarget->is64Bit() &&		if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
!SDValue(Node, 1).use_empty()) {		SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,		unsigned AHExtOpcode =
X86::AX, MVT::i16, InFlag);		isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
InFlag = Result.getValue(2);
		SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
// If we also need AL (the quotient), get it by extracting a subreg from		MVT::Glue, AHCopy, InFlag);
// Result. The fast register allocator does not like multiple CopyFromReg		SDValue Result(RNode, 0);
// nodes using aliasing registers.		InFlag = SDValue(RNode, 1);
if (!SDValue(Node, 0).use_empty())
ReplaceUses(SDValue(Node, 0),		if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG \|\|
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));		Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
		if (Node->getValueType(1) == MVT::i64) {
// Shift AX right by 8 bits instead of using AH.		// It's not possible to directly movsx AH to a 64bit register, because
Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,		// the latter needs the REX prefix, but the former can't have it.
Result,		assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG &&
CurDAG->getTargetConstant(8, MVT::i8)),		"Unexpected i64 sext of h-register");
		Result =
		SDValue(CurDAG->getMachineNode(
		TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
		CurDAG->getTargetConstant(0, MVT::i64), Result,
		CurDAG->getTargetConstant(X86::sub_32bit, MVT::i32)),
0);		0);
ReplaceUses(SDValue(Node, 1),		}
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));		} else {
		Result =
		CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
		}
		ReplaceUses(SDValue(Node, 1), Result);
		DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}		}
// Copy the division (low) result, if it is needed.		// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {		if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,		SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
LoReg, NVT, InFlag);		LoReg, NVT, InFlag);
InFlag = Result.getValue(2);		InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);		ReplaceUses(SDValue(Node, 0), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');		DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
▲ Show 20 Lines • Show All 262 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86ISelLowering.h

Show First 20 Lines • Show All 298 Lines • ▼ Show 20 Lines	enum NodeType {

BEXTR, // BEXTR - Bit field extract		BEXTR, // BEXTR - Bit field extract

UMUL, // LOW, HI, FLAGS = umul LHS, RHS		UMUL, // LOW, HI, FLAGS = umul LHS, RHS

// 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS		// 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS
SMUL8, UMUL8,		SMUL8, UMUL8,

		// 8-bit divrem that zero-extend the high result (AH).
		UDIVREM8_ZEXT_HREG,
		SDIVREM8_SEXT_HREG,

// MUL_IMM - X86 specific multiply by immediate.		// MUL_IMM - X86 specific multiply by immediate.
MUL_IMM,		MUL_IMM,

// PTEST - Vector bitwise comparisons.		// PTEST - Vector bitwise comparisons.
PTEST,		PTEST,

// TESTP - Vector packed fp sign bitwise comparisons.		// TESTP - Vector packed fp sign bitwise comparisons.
TESTP,		TESTP,
▲ Show 20 Lines • Show All 719 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 19,074 Lines • ▼ Show 20 Lines	const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";		case X86ISD::PCMPEQM: return "X86ISD::PCMPEQM";
case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";		case X86ISD::PCMPGTM: return "X86ISD::PCMPGTM";
case X86ISD::ADD: return "X86ISD::ADD";		case X86ISD::ADD: return "X86ISD::ADD";
case X86ISD::SUB: return "X86ISD::SUB";		case X86ISD::SUB: return "X86ISD::SUB";
case X86ISD::ADC: return "X86ISD::ADC";		case X86ISD::ADC: return "X86ISD::ADC";
case X86ISD::SBB: return "X86ISD::SBB";		case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";		case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";		case X86ISD::UMUL: return "X86ISD::UMUL";
		case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG";
		case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG";
case X86ISD::INC: return "X86ISD::INC";		case X86ISD::INC: return "X86ISD::INC";
case X86ISD::DEC: return "X86ISD::DEC";		case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";		case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";		case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";		case X86ISD::AND: return "X86ISD::AND";
case X86ISD::BEXTR: return "X86ISD::BEXTR";		case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";		case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";		case X86ISD::PTEST: return "X86ISD::PTEST";
▲ Show 20 Lines • Show All 5,182 Lines • ▼ Show 20 Lines	if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND \|\|
}		}
}		}
return SDValue();		return SDValue();
}		}

static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,		static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,		TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {		const X86Subtarget *Subtarget) {
		SDValue N0 = N->getOperand(0);
		EVT VT = N->getValueType(0);

		// (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
		// (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
		// This exposes the sext to the sdivrem lowering, so that it directly extends
		// from AH (which we otherwise need to do contortions to access).
		if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
		N0.getValueType() == MVT::i8 && VT == MVT::i32) {
		SDLoc dl(N);
		SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
		SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, dl, NodeTys,
		N0.getOperand(0), N0.getOperand(1));
		DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
		return R.getValue(1);
		}

if (!DCI.isBeforeLegalizeOps())		if (!DCI.isBeforeLegalizeOps())
return SDValue();		return SDValue();

if (!Subtarget->hasFp256())		if (!Subtarget->hasFp256())
return SDValue();		return SDValue();

EVT VT = N->getValueType(0);
if (VT.isVector() && VT.getSizeInBits() == 256) {		if (VT.isVector() && VT.getSizeInBits() == 256) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);		SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())		if (R.getNode())
return R;		return R;
}		}

return SDValue();		return SDValue();
}		}
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines	if (N0.getOpcode() == ISD::TRUNCATE &&
}		}
}		}
if (VT.is256BitVector()) {		if (VT.is256BitVector()) {
SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);		SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget);
if (R.getNode())		if (R.getNode())
return R;		return R;
}		}

		// (i8,i32 zext (udivrem (i8 x, i8 y)) ->
		// (i8,i32 (udivrem_zext_hreg (i8 x, i8 y)
		// This exposes the zext to the udivrem lowering, so that it directly extends
		// from AH (which we otherwise need to do contortions to access).
		if (N0.getOpcode() == ISD::UDIVREM &&
		N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
		(VT == MVT::i32 \|\| VT == MVT::i64)) {
		SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
		SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
		N0.getOperand(0), N0.getOperand(1));
		DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
		return R.getValue(1);
		}

return SDValue();		return SDValue();
}		}

// Optimize x == -y --> x+y == 0		// Optimize x == -y --> x+y == 0
// x != -y --> x+y != 0		// x != -y --> x+y != 0
static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,		static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
const X86Subtarget* Subtarget) {		const X86Subtarget* Subtarget) {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();		ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
▲ Show 20 Lines • Show All 1,276 Lines • Show Last 20 Lines

llvm/trunk/lib/Target/X86/X86InstrExtension.td

Show First 20 Lines • Show All 91 Lines • ▼ Show 20 Lines	def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
TB, OpSize32, Sched<[WriteALULd]>;		TB, OpSize32, Sched<[WriteALULd]>;

// These are the same as the regular MOVZX32rr8 and MOVZX32rm8		// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
// except that they use GR32_NOREX for the output operand register class		// except that they use GR32_NOREX for the output operand register class
// instead of GR32. This allows them to operate on h registers on x86-64.		// instead of GR32. This allows them to operate on h registers on x86-64.
let neverHasSideEffects = 1, isCodeGenOnly = 1 in {		let neverHasSideEffects = 1, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,		def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),		(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl\|x}\t{$src, $dst\|$dst, $src}",		"movz{bl\|x}\t{$src, $dst\|$dst, $src} # NOREX",
[], IIC_MOVZX>, TB, Sched<[WriteALU]>;		[], IIC_MOVZX>, TB, Sched<[WriteALU]>;
let mayLoad = 1 in		let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,		def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),		(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl\|x}\t{$src, $dst\|$dst, $src}",		"movz{bl\|x}\t{$src, $dst\|$dst, $src} # NOREX",
[], IIC_MOVZX>, TB, Sched<[WriteALULd]>;		[], IIC_MOVZX>, TB, Sched<[WriteALULd]>;

		def MOVSX32_NOREXrr8 : I<0xBE, MRMSrcReg,
		(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
		"movs{bl\|x}\t{$src, $dst\|$dst, $src} # NOREX",
		[], IIC_MOVSX>, TB, Sched<[WriteALU]>;
		let mayLoad = 1 in
		def MOVSX32_NOREXrm8 : I<0xBE, MRMSrcMem,
		(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
		"movs{bl\|x}\t{$src, $dst\|$dst, $src} # NOREX",
		[], IIC_MOVSX>, TB, Sched<[WriteALULd]>;
}		}

// MOVSX64rr8 always has a REX prefix and it has an 8-bit register		// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
// operand, which makes it a rare instruction with an 8-bit register		// operand, which makes it a rare instruction with an 8-bit register
// operand that can never access an h register. If support for h registers		// operand that can never access an h register. If support for h registers
// were generalized, this would require a special register class.		// were generalized, this would require a special register class.
def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),		def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
"movs{bq\|x}\t{$src, $dst\|$dst, $src}",		"movs{bq\|x}\t{$src, $dst\|$dst, $src}",
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/divrem8_ext.ll

				; RUN: llc -march=x86-64 < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-64
				; RUN: llc -march=x86 < %s \| FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-32
				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.10.0"

				define zeroext i8 @test_udivrem_zext_ah(i8 %x, i8 %y) {
				; CHECK-LABEL: test_udivrem_zext_ah
				; CHECK: divb
				; CHECK: movzbl %ah, [[REG_REM:%[a-z0-9]+]]
				; CHECK: movb %al, ([[REG_ZPTR:%[a-z0-9]+]])
				; CHECK: movl [[REG_REM]], %eax
				; CHECK: ret
				%div = udiv i8 %x, %y
				store i8 %div, i8* @z
				%1 = urem i8 %x, %y
				ret i8 %1
				}

				define zeroext i8 @test_urem_zext_ah(i8 %x, i8 %y) {
				; CHECK-LABEL: test_urem_zext_ah
				; CHECK: divb
				; CHECK: movzbl %ah, %eax
				; CHECK: ret
				%1 = urem i8 %x, %y
				ret i8 %1
				}

				define i8 @test_urem_noext_ah(i8 %x, i8 %y) {
				; CHECK-LABEL: test_urem_noext_ah
				; CHECK: divb [[REG_X:%[a-z0-9]+]]
				; CHECK: movzbl %ah, %eax
				; CHECK: addb [[REG_X]], %al
				; CHECK: ret
				%1 = urem i8 %x, %y
				%2 = add i8 %1, %y
				ret i8 %2
				}

				define i64 @test_urem_zext64_ah(i8 %x, i8 %y) {
				; CHECK-LABEL: test_urem_zext64_ah
				; CHECK: divb
				; CHECK: movzbl %ah, %eax
				; CHECK-32: xorl %edx, %edx
				; CHECK: ret
				%1 = urem i8 %x, %y
				%2 = zext i8 %1 to i64
				ret i64 %2
				}

				define signext i8 @test_sdivrem_sext_ah(i8 %x, i8 %y) {
				; CHECK-LABEL: test_sdivrem_sext_ah
				; CHECK: cbtw
				; CHECK: idivb
				; CHECK: movsbl %ah, [[REG_REM:%[a-z0-9]+]]
				; CHECK: movb %al, ([[REG_ZPTR]])
				; CHECK: movl [[REG_REM]], %eax
				; CHECK: ret
				%div = sdiv i8 %x, %y
				store i8 %div, i8* @z
				%1 = srem i8 %x, %y
				ret i8 %1
				}

				define signext i8 @test_srem_sext_ah(i8 %x, i8 %y) {
				; CHECK-LABEL: test_srem_sext_ah
				; CHECK: cbtw
				; CHECK: idivb
				; CHECK: movsbl %ah, %eax
				; CHECK: ret
				%1 = srem i8 %x, %y
				ret i8 %1
				}

				define i8 @test_srem_noext_ah(i8 %x, i8 %y) {
				; CHECK-LABEL: test_srem_noext_ah
				; CHECK: cbtw
				; CHECK: idivb [[REG_X:%[a-z0-9]+]]
				; CHECK: movsbl %ah, %eax
				; CHECK: addb [[REG_X]], %al
				; CHECK: ret
				%1 = srem i8 %x, %y
				%2 = add i8 %1, %y
				ret i8 %2
				}

				define i64 @test_srem_sext64_ah(i8 %x, i8 %y) {
				; CHECK-LABEL: test_srem_sext64_ah
				; CHECK: cbtw
				; CHECK: idivb
				; CHECK: movsbl %ah, %eax
				; CHECK-32: movl %eax, %edx
				; CHECK-32: sarl $31, %edx
				; CHECK-64: movsbq %al, %rax
				; CHECK: ret
				%1 = srem i8 %x, %y
				%2 = sext i8 %1 to i64
				ret i64 %2
				}

				@z = external global i8

This is an archive of the discontinued LLVM Phabricator instance.

[X86] 8bit divrem: Improve codegen for AH register extraction.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 15727

llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp

llvm/trunk/lib/Target/X86/X86ISelLowering.h

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/lib/Target/X86/X86InstrExtension.td

llvm/trunk/test/CodeGen/X86/divrem8_ext.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] 8bit divrem: Improve codegen for AH register extraction.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 15727

llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp

llvm/trunk/lib/Target/X86/X86ISelLowering.h

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

llvm/trunk/lib/Target/X86/X86InstrExtension.td

llvm/trunk/test/CodeGen/X86/divrem8_ext.ll

[X86] 8bit divrem: Improve codegen for AH register extraction.
ClosedPublic