Diff 132147

lib/Target/X86/X86ISelDAGToDAG.cpp

Show First 20 Lines • Show All 3,067 Lines • ▼ Show 20 Lines	if ((N0.getOpcode() == ISD::AND \|\|
// Replace SUB\|CMP with TEST, since SUB has two outputs while TEST has		// Replace SUB\|CMP with TEST, since SUB has two outputs while TEST has
// one, do not call ReplaceAllUsesWith.		// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),		ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));		SDValue(NewNode, 0));
CurDAG->RemoveDeadNode(Node);		CurDAG->RemoveDeadNode(Node);
return;		return;
}		}

// For example, "testl %eax, $2048" to "testb %ah, $8".
if (isShiftedUInt<8, 8>(Mask) &&
(!(Mask & 0x8000) \|\| hasNoSignedComparisonUses(Node))) {
// Shift the immediate right by 8 bits.
SDValue ShiftedImm = CurDAG->getTargetConstant(Mask >> 8, dl, MVT::i8);
SDValue Reg = N0.getOperand(0);

// Extract the h-register.
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
MVT::i8, Reg);

// Emit a testb. The EXTRACT_SUBREG becomes a COPY that can only
// target GR8_NOREX registers, so make sure the register class is
// forced.
SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
craig.topperUnsubmitted Not Done Reply Inline Actions I believe this was the only use of the TEST8ri_NOREX instruction. Can you remove it from the td file too? craig.topper: I believe this was the only use of the TEST8ri_NOREX instruction. Can you remove it from the td…
MVT::i32, Subreg, ShiftedImm);
// Replace SUB\|CMP with TEST, since SUB has two outputs while TEST has
// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
CurDAG->RemoveDeadNode(Node);
return;
}

// For example, "testl %eax, $32776" to "testw %ax, $32776".		// For example, "testl %eax, $32776" to "testw %ax, $32776".
// NOTE: We only want to form TESTW instructions if optimizing for		// NOTE: We only want to form TESTW instructions if optimizing for
// min size. Otherwise we only save one byte and possibly get a length		// min size. Otherwise we only save one byte and possibly get a length
// changing prefix penalty in the decoders.		// changing prefix penalty in the decoders.
if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 &&		if (OptForMinSize && isUInt<16>(Mask) && N0.getValueType() != MVT::i16 &&
(!(Mask & 0x8000) \|\| hasNoSignedComparisonUses(Node))) {		(!(Mask & 0x8000) \|\| hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16);		SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i16);
SDValue Reg = N0.getOperand(0);		SDValue Reg = N0.getOperand(0);
Show All 9 Lines	if ((N0.getOpcode() == ISD::AND \|\|
// one, do not call ReplaceAllUsesWith.		// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),		ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));		SDValue(NewNode, 0));
CurDAG->RemoveDeadNode(Node);		CurDAG->RemoveDeadNode(Node);
return;		return;
}		}

// For example, "testq %rax, $268468232" to "testl %eax, $268468232".		// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 &&		if (isUInt<32>(Mask) && N0.getValueType() == MVT::i64 &&
		niravdUnsubmitted Not Done Reply Inline Actions We're now generating a testl but we've passed the logic to reduce to the smaller to testw. Can you fold that logic into this and add a test case (e.g. replicate testOperand32 with optsize set)? niravd: We're now generating a testl but we've passed the logic to reduce to the smaller to testw. Can…
(!(Mask & 0x80000000) \|\| hasNoSignedComparisonUses(Node))) {		(!(Mask & 0x80000000) \|\| hasNoSignedComparisonUses(Node))) {
SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32);		SDValue Imm = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
SDValue Reg = N0.getOperand(0);		SDValue Reg = N0.getOperand(0);

// Extract the 32-bit subregister.		// Extract the 32-bit subregister.
SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,		SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
MVT::i32, Reg);		MVT::i32, Reg);

// Emit a testl.		// Emit a testl.
SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,		SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
Subreg, Imm);		Subreg, Imm);
// Replace SUB\|CMP with TEST, since SUB has two outputs while TEST has		// Replace SUB\|CMP with TEST, since SUB has two outputs while TEST has
// one, do not call ReplaceAllUsesWith.		// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),		ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));		SDValue(NewNode, 0));
CurDAG->RemoveDeadNode(Node);		CurDAG->RemoveDeadNode(Node);
▲ Show 20 Lines • Show All 48 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrArithmetic.td

Show First 20 Lines • Show All 1,251 Lines • ▼ Show 20 Lines	let Defs = [EFLAGS] in {
let Predicates = [In64BitMode] in		let Predicates = [In64BitMode] in
def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;		def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;

def TEST8mi : BinOpMI_F<0xF6, "test", Xi8 , X86testpat, MRM0m>;		def TEST8mi : BinOpMI_F<0xF6, "test", Xi8 , X86testpat, MRM0m>;
def TEST16mi : BinOpMI_F<0xF6, "test", Xi16, X86testpat, MRM0m>;		def TEST16mi : BinOpMI_F<0xF6, "test", Xi16, X86testpat, MRM0m>;
def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>;		def TEST32mi : BinOpMI_F<0xF6, "test", Xi32, X86testpat, MRM0m>;
let Predicates = [In64BitMode] in		let Predicates = [In64BitMode] in
def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>;		def TEST64mi32 : BinOpMI_F<0xF6, "test", Xi64, X86testpat, MRM0m>;

// When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
// register class is constrained to GR8_NOREX. This pseudo is explicitly
// marked side-effect free, since it doesn't have an isel pattern like
// other test instructions.
let isPseudo = 1, hasSideEffects = 0 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
"", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
} // Defs = [EFLAGS]		} // Defs = [EFLAGS]

def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL,		def TEST8i8 : BinOpAI_F<0xA8, "test", Xi8 , AL,
"{$src, %al\|al, $src}">;		"{$src, %al\|al, $src}">;
def TEST16i16 : BinOpAI_F<0xA8, "test", Xi16, AX,		def TEST16i16 : BinOpAI_F<0xA8, "test", Xi16, AX,
"{$src, %ax\|ax, $src}">;		"{$src, %ax\|ax, $src}">;
def TEST32i32 : BinOpAI_F<0xA8, "test", Xi32, EAX,		def TEST32i32 : BinOpAI_F<0xA8, "test", Xi32, EAX,
"{$src, %eax\|eax, $src}">;		"{$src, %eax\|eax, $src}">;
▲ Show 20 Lines • Show All 114 Lines • Show Last 20 Lines

lib/Target/X86/X86InstrInfo.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 8,012 Lines • ▼ Show 20 Lines	case X86::VMOVUPSZ128mr_NOVLX:
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr),		return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr),
get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);		get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
case X86::VMOVAPSZ256mr_NOVLX:		case X86::VMOVAPSZ256mr_NOVLX:
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr),		return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr),
get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);		get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
case X86::VMOVUPSZ256mr_NOVLX:		case X86::VMOVUPSZ256mr_NOVLX:
return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),		return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);		get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
case X86::TEST8ri_NOREX:
MI.setDesc(get(X86::TEST8ri));
return true;
case X86::MOV32ri64:		case X86::MOV32ri64:
MI.setDesc(get(X86::MOV32ri));		MI.setDesc(get(X86::MOV32ri));
return true;		return true;

// KNL does not recognize dependency-breaking idioms for mask registers,		// KNL does not recognize dependency-breaking idioms for mask registers,
// so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.		// so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
// Using %k0 as the undef input register is a performance heuristic based		// Using %k0 as the undef input register is a performance heuristic based
// on the assumption that %k0 is used less frequently than the other mask		// on the assumption that %k0 is used less frequently than the other mask
▲ Show 20 Lines • Show All 3,228 Lines • Show Last 20 Lines

lib/Target/X86/X86MacroFusion.cpp

Show First 20 Lines • Show All 80 Lines • ▼ Show 20 Lines	static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
case X86::TEST32ri:		case X86::TEST32ri:
case X86::TEST32i32:		case X86::TEST32i32:
case X86::TEST64i32:		case X86::TEST64i32:
case X86::TEST64ri32:		case X86::TEST64ri32:
case X86::TEST8mr:		case X86::TEST8mr:
case X86::TEST16mr:		case X86::TEST16mr:
case X86::TEST32mr:		case X86::TEST32mr:
case X86::TEST64mr:		case X86::TEST64mr:
case X86::TEST8ri_NOREX:
case X86::AND16i16:		case X86::AND16i16:
case X86::AND16ri:		case X86::AND16ri:
case X86::AND16ri8:		case X86::AND16ri8:
case X86::AND16rm:		case X86::AND16rm:
case X86::AND16rr:		case X86::AND16rr:
case X86::AND32i32:		case X86::AND32i32:
case X86::AND32ri:		case X86::AND32ri:
case X86::AND32ri8:		case X86::AND32ri8:
▲ Show 20 Lines • Show All 103 Lines • Show Last 20 Lines

test/CodeGen/X86/testb-je-fusion.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-- -mcpu=corei7-avx \| FileCheck %s			; RUN: llc < %s -mtriple=x86_64-- -mcpu=corei7-avx \| FileCheck %s

	; testb should be scheduled right before je to enable macro-fusion.			; testb should be scheduled right before je to enable macro-fusion.

	define i32 @check_flag(i32 %flags, ...) nounwind {			define i32 @check_flag(i32 %flags, ...) nounwind {
	; CHECK-LABEL: check_flag:			; CHECK-LABEL: check_flag:
	; CHECK: # %bb.0: # %entry			; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: movl %edi, %ecx
	; CHECK-NEXT: xorl %eax, %eax			; CHECK-NEXT: xorl %eax, %eax
	; CHECK-NEXT: testb $2, %ch			; CHECK-NEXT: testl $512, %edi # imm = 0x200
	; CHECK-NEXT: je .LBB0_2			; CHECK-NEXT: je .LBB0_2
	; CHECK-NEXT: # %bb.1: # %if.then			; CHECK-NEXT: # %bb.1: # %if.then
	; CHECK-NEXT: movl $1, %eax			; CHECK-NEXT: movl $1, %eax
	; CHECK-NEXT: .LBB0_2: # %if.end			; CHECK-NEXT: .LBB0_2: # %if.end
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%and = and i32 %flags, 512			%and = and i32 %flags, 512
	%tobool = icmp eq i32 %and, 0			%tobool = icmp eq i32 %and, 0
	Show All 9 Lines

test/CodeGen/X86/vastart-defs-eflags.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc %s -o - \| FileCheck %s			; RUN: llc %s -o - \| FileCheck %s

	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"			target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
	target triple = "x86_64-apple-macosx10.10.0"			target triple = "x86_64-apple-macosx10.10.0"

	; Check that vastart handling doesn't get between testb and je for the branch.			; Check that vastart handling doesn't get between testb and je for the branch.
	define i32 @check_flag(i32 %flags, ...) nounwind {			define i32 @check_flag(i32 %flags, ...) nounwind {
	; CHECK-LABEL: check_flag:			; CHECK-LABEL: check_flag:
	; CHECK: ## %bb.0: ## %entry			; CHECK: ## %bb.0: ## %entry
	; CHECK-NEXT: pushq %rbx			; CHECK-NEXT: subq $56, %rsp
	; CHECK-NEXT: subq $48, %rsp
	; CHECK-NEXT: movl %edi, %ebx
	; CHECK-NEXT: testb %al, %al			; CHECK-NEXT: testb %al, %al
	; CHECK-NEXT: je LBB0_2			; CHECK-NEXT: je LBB0_2
	; CHECK-NEXT: ## %bb.1: ## %entry			; CHECK-NEXT: ## %bb.1: ## %entry
	; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: movaps %xmm5, (%rsp)			; CHECK-NEXT: movaps %xmm5, (%rsp)
	; CHECK-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp)			; CHECK-NEXT: movaps %xmm6, {{[0-9]+}}(%rsp)
	; CHECK-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp)			; CHECK-NEXT: movaps %xmm7, {{[0-9]+}}(%rsp)
	; CHECK-NEXT: LBB0_2: ## %entry			; CHECK-NEXT: LBB0_2: ## %entry
	; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)			; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
	; CHECK-NEXT: xorl %eax, %eax			; CHECK-NEXT: xorl %eax, %eax
	; CHECK-NEXT: testb $2, %bh			; CHECK-NEXT: testl $512, %edi ## imm = 0x200
	; CHECK-NEXT: je LBB0_4			; CHECK-NEXT: je LBB0_4
	; CHECK-NEXT: ## %bb.3: ## %if.then			; CHECK-NEXT: ## %bb.3: ## %if.then
	; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax			; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
	; CHECK-NEXT: movq %rax, 16			; CHECK-NEXT: movq %rax, 16
	; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax			; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax
	; CHECK-NEXT: movq %rax, 8			; CHECK-NEXT: movq %rax, 8
	; CHECK-NEXT: movl $48, 4			; CHECK-NEXT: movl $48, 4
	; CHECK-NEXT: movl $8, 0			; CHECK-NEXT: movl $8, 0
	; CHECK-NEXT: movl $1, %eax			; CHECK-NEXT: movl $1, %eax
	; CHECK-NEXT: LBB0_4: ## %if.end			; CHECK-NEXT: LBB0_4: ## %if.end
	; CHECK-NEXT: addq $48, %rsp			; CHECK-NEXT: addq $56, %rsp
	; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	entry:			entry:
	%and = and i32 %flags, 512			%and = and i32 %flags, 512
	%tobool = icmp eq i32 %and, 0			%tobool = icmp eq i32 %and, 0
	br i1 %tobool, label %if.end, label %if.then			br i1 %tobool, label %if.end, label %if.then

	if.then: ; preds = %entry			if.then: ; preds = %entry
	call void @llvm.va_start(i8* null)			call void @llvm.va_start(i8* null)
	br label %if.end			br label %if.end

	if.end: ; preds = %entry, %if.then			if.end: ; preds = %entry, %if.then
	%hasflag = phi i32 [ 1, %if.then ], [ 0, %entry ]			%hasflag = phi i32 [ 1, %if.then ], [ 0, %entry ]
	ret i32 %hasflag			ret i32 %hasflag
	}			}

	declare void @llvm.va_start(i8*) nounwind			declare void @llvm.va_start(i8*) nounwind

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Avoid using high register trick for test instruction
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 132147

lib/Target/X86/X86ISelDAGToDAG.cpp

lib/Target/X86/X86InstrArithmetic.td

lib/Target/X86/X86InstrInfo.cpp

lib/Target/X86/X86MacroFusion.cpp

test/CodeGen/X86/testb-je-fusion.ll

test/CodeGen/X86/vastart-defs-eflags.ll

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Avoid using high register trick for test instructionClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 132147

lib/Target/X86/X86ISelDAGToDAG.cpp

lib/Target/X86/X86InstrArithmetic.td

lib/Target/X86/X86InstrInfo.cpp

lib/Target/X86/X86MacroFusion.cpp

test/CodeGen/X86/testb-je-fusion.ll

test/CodeGen/X86/vastart-defs-eflags.ll

[X86] Avoid using high register trick for test instruction
ClosedPublic