This is an archive of the discontinued LLVM Phabricator instance.

[X86]: Quit promoting 8 and 16 bit compares to 32 bit.
ClosedPublic

Authored by kbsmith1 on Jun 8 2016, 10:48 AM.

Download Raw Diff

Details

Reviewers

grosbach
echristo
sanjoy
DavidKreitzer

Commits

rL272801: [X86]: Quit promoting 8 and 16 bit compares to 32 bit.

Summary

This change effectively just reverts r195496, and updates the tests as needed.
8 and 16 bit compares no longer promoted up into 32 bit compares. This has some
nice performance improvements, especially in eembc/rgbcmykv2. In order for this
not to cause performance regressions in 401.bzip2, changes http://reviews.llvm.org/D21085
are also necessary to get all the necessary movb and movw instructions promoted to
movzbl/movzwl.

Diff Detail

Repository: rL LLVM

Event Timeline

kbsmith1 updated this revision to Diff 60068.Jun 8 2016, 10:48 AM

kbsmith1 retitled this revision from to [X86]: Quit promoting 8 and 16 bit compares to 32 bit..

kbsmith1 updated this object.

kbsmith1 added reviewers: echristo, DavidKreitzer, sanjoy.

kbsmith1 added a subscriber: llvm-commits.

Herald added a subscriber: mehdi_amini. · View Herald TranscriptJun 8 2016, 10:48 AM

We talked about this on the list, but getting an explicit ack from Jim.

-eric

echristo accepted this revision.Jun 8 2016, 1:27 PM

echristo edited edge metadata.

This revision is now accepted and ready to land.Jun 8 2016, 1:27 PM

eli.friedman added a subscriber: eli.friedman.Jun 8 2016, 2:20 PM

eli.friedman added inline comments.

test/CodeGen/X86/memcmp.ll
44 ↗	(On Diff #60068)	16-bit immediate operands are bad for performance on modern x86.

kbsmith1 added inline comments.Jun 8 2016, 3:48 PM

test/CodeGen/X86/memcmp.ll
44 ↗	(On Diff #60068)	I'm looking into changing the code so that 16 bit compares which have a constant operand will continue to get promoted, and how that affects the performance numbers.

Updated changes so this will continue to promote 16 bit compares to 32 bits if one of the compare
operands is a constant. This addresses Eli Friedman's comment.

Closed by commit rL272801: [X86]: Quit promoting 8 and 16 bit compares to 32 bit. (authored by kbsmith1). · Explain WhyJun 15 2016, 9:44 AM

This revision was automatically updated to reflect the committed changes.

In D21144#453975, @kbsmith1 wrote:

Updated changes so this will continue to promote 16 bit compares to 32 bits if one of the compare
operands is a constant. This addresses Eli Friedman's comment.

Yes, 16bit immediate constant may introduce LCP that may end up hurting performance. But on newer architectures (sandybridge and later), this problem is much less severe, especially when the loop body fits in LSD. I don't think it's a good idea to blindly convert all 16bit immediate constant comparison to 32bit.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Target/

X86/

X86ISelLowering.cpp

32 lines

test/

CodeGen/

X86/

8 lines

56 lines

48 lines

2 lines

machine-sink-and-implicit-null-checks.ll

2 lines

memcmp.ll

6 lines

pr5145.ll

16 lines

x86-shrink-wrapping.ll

3 lines

Diff 60857

llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 3,921 Lines • ▼ Show 20 Lines	bool X86::isCalleePop(CallingConv::ID CallingConv,
case CallingConv::X86_StdCall:		case CallingConv::X86_StdCall:
case CallingConv::X86_FastCall:		case CallingConv::X86_FastCall:
case CallingConv::X86_ThisCall:		case CallingConv::X86_ThisCall:
case CallingConv::X86_VectorCall:		case CallingConv::X86_VectorCall:
return !is64Bit;		return !is64Bit;
}		}
}		}

/// \brief Return true if the condition is an unsigned comparison operation.
static bool isX86CCUnsigned(unsigned X86CC) {
switch (X86CC) {
default:
llvm_unreachable("Invalid integer condition!");
case X86::COND_E:
case X86::COND_NE:
case X86::COND_B:
case X86::COND_A:
case X86::COND_BE:
case X86::COND_AE:
return true;
case X86::COND_G:
case X86::COND_GE:
case X86::COND_L:
case X86::COND_LE:
return false;
}
}

static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {		static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
switch (SetCCOpcode) {		switch (SetCCOpcode) {
default: llvm_unreachable("Invalid integer condition!");		default: llvm_unreachable("Invalid integer condition!");
case ISD::SETEQ: return X86::COND_E;		case ISD::SETEQ: return X86::COND_E;
case ISD::SETGT: return X86::COND_G;		case ISD::SETGT: return X86::COND_G;
case ISD::SETGE: return X86::COND_GE;		case ISD::SETGE: return X86::COND_GE;
case ISD::SETLT: return X86::COND_L;		case ISD::SETLT: return X86::COND_L;
case ISD::SETLE: return X86::COND_LE;		case ISD::SETLE: return X86::COND_LE;
▲ Show 20 Lines • Show All 10,803 Lines • ▼ Show 20 Lines	SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
if (isNullConstant(Op1))		if (isNullConstant(Op1))
return EmitTest(Op0, X86CC, dl, DAG);		return EmitTest(Op0, X86CC, dl, DAG);

assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&		assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
"Unexpected comparison operation for MVT::i1 operands");		"Unexpected comparison operation for MVT::i1 operands");

if ((Op0.getValueType() == MVT::i8 \|\| Op0.getValueType() == MVT::i16 \|\|		if ((Op0.getValueType() == MVT::i8 \|\| Op0.getValueType() == MVT::i16 \|\|
Op0.getValueType() == MVT::i32 \|\| Op0.getValueType() == MVT::i64)) {		Op0.getValueType() == MVT::i32 \|\| Op0.getValueType() == MVT::i64)) {
// Do the comparison at i32 if it's smaller, besides the Atom case.
// This avoids subregister aliasing issues. Keep the smaller reference
// if we're optimizing for size, however, as that'll allow better folding
// of memory operations.
if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
!DAG.getMachineFunction().getFunction()->optForMinSize() &&
!Subtarget.isAtom()) {
unsigned ExtendOp =
isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
Op0 = DAG.getNode(ExtendOp, dl, MVT::i32, Op0);
Op1 = DAG.getNode(ExtendOp, dl, MVT::i32, Op1);
}
// Use SUB instead of CMP to enable CSE between SUB and CMP.		// Use SUB instead of CMP to enable CSE between SUB and CMP.
SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);		SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,		SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs,
Op0, Op1);		Op0, Op1);
return SDValue(Sub.getNode(), 1);		return SDValue(Sub.getNode(), 1);
}		}
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);		return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}		}
▲ Show 20 Lines • Show All 16,652 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/3addr-16bit.ll

	; RUN: llc < %s -mtriple=x86_64-apple-darwin -asm-verbose=false \| FileCheck %s -check-prefix=64BIT			; RUN: llc < %s -mtriple=x86_64-apple-darwin -asm-verbose=false \| FileCheck %s -check-prefix=64BIT
	; rdar://7329206			; rdar://7329206

	; In 32-bit the partial register stall would degrade performance.			; In 32-bit the partial register stall would degrade performance.

	define zeroext i16 @t1(i16 zeroext %c, i16 zeroext %k) nounwind ssp {			define zeroext i16 @t1(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
	entry:			entry:
	; 32BIT-LABEL: t1:			; 32BIT-LABEL: t1:
	; 32BIT: movw 20(%esp), %ax			; 32BIT: movw 20(%esp), %ax
	; 32BIT-NOT: movw %ax, %cx			; 32BIT-NOT: movw %ax, %cx
	; 32BIT: leal 1(%eax), %ecx			; 32BIT: leal 1(%eax), %ecx

	; 64BIT-LABEL: t1:			; 64BIT-LABEL: t1:
	; 64BIT-NOT: movw %si, %ax			; 64BIT-NOT: movw %si, %ax
	; 64BIT: leal 1(%rsi), %eax			; 64BIT: movl %esi, %eax
	%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]			%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
	%1 = add i16 %k, 1 ; <i16> [#uses=3]			%1 = add i16 %k, 1 ; <i16> [#uses=3]
	br i1 %0, label %bb, label %bb1			br i1 %0, label %bb, label %bb1

	bb: ; preds = %entry			bb: ; preds = %entry
	tail call void @foo(i16 zeroext %1) nounwind			tail call void @foo(i16 zeroext %1) nounwind
	ret i16 %1			ret i16 %1

	bb1: ; preds = %entry			bb1: ; preds = %entry
	ret i16 %1			ret i16 %1
	}			}

	define zeroext i16 @t2(i16 zeroext %c, i16 zeroext %k) nounwind ssp {			define zeroext i16 @t2(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
	entry:			entry:
	; 32BIT-LABEL: t2:			; 32BIT-LABEL: t2:
	; 32BIT: movw 20(%esp), %ax			; 32BIT: movw 20(%esp), %ax
	; 32BIT-NOT: movw %ax, %cx			; 32BIT-NOT: movw %ax, %cx
	; 32BIT: leal -1(%eax), %ecx			; 32BIT: leal -1(%eax), %ecx

	; 64BIT-LABEL: t2:			; 64BIT-LABEL: t2:
	; 64BIT-NOT: movw %si, %ax			; 64BIT-NOT: movw %si, %ax
	; 64BIT: leal -1(%rsi), %eax			; 64BIT: movl %esi, %eax
	; 64BIT: movzwl %ax			; 64BIT: movzwl %ax
	%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]			%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
	%1 = add i16 %k, -1 ; <i16> [#uses=3]			%1 = add i16 %k, -1 ; <i16> [#uses=3]
	br i1 %0, label %bb, label %bb1			br i1 %0, label %bb, label %bb1

	bb: ; preds = %entry			bb: ; preds = %entry
	tail call void @foo(i16 zeroext %1) nounwind			tail call void @foo(i16 zeroext %1) nounwind
	ret i16 %1			ret i16 %1

	bb1: ; preds = %entry			bb1: ; preds = %entry
	ret i16 %1			ret i16 %1
	}			}

	declare void @foo(i16 zeroext)			declare void @foo(i16 zeroext)

	define zeroext i16 @t3(i16 zeroext %c, i16 zeroext %k) nounwind ssp {			define zeroext i16 @t3(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
	entry:			entry:
	; 32BIT-LABEL: t3:			; 32BIT-LABEL: t3:
	; 32BIT: movw 20(%esp), %ax			; 32BIT: movw 20(%esp), %ax
	; 32BIT-NOT: movw %ax, %cx			; 32BIT-NOT: movw %ax, %cx
	; 32BIT: leal 2(%eax), %ecx			; 32BIT: leal 2(%eax), %ecx

	; 64BIT-LABEL: t3:			; 64BIT-LABEL: t3:
	; 64BIT-NOT: movw %si, %ax			; 64BIT-NOT: movw %si, %ax
	; 64BIT: leal 2(%rsi), %eax			; 64BIT: movl %esi, %eax
	%0 = add i16 %k, 2 ; <i16> [#uses=3]			%0 = add i16 %k, 2 ; <i16> [#uses=3]
	%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]			%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
	br i1 %1, label %bb, label %bb1			br i1 %1, label %bb, label %bb1

	bb: ; preds = %entry			bb: ; preds = %entry
	tail call void @foo(i16 zeroext %0) nounwind			tail call void @foo(i16 zeroext %0) nounwind
	ret i16 %0			ret i16 %0

	bb1: ; preds = %entry			bb1: ; preds = %entry
	ret i16 %0			ret i16 %0
	}			}

	define zeroext i16 @t4(i16 zeroext %c, i16 zeroext %k) nounwind ssp {			define zeroext i16 @t4(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
	entry:			entry:
	; 32BIT-LABEL: t4:			; 32BIT-LABEL: t4:
	; 32BIT: movw 16(%esp), %ax			; 32BIT: movw 16(%esp), %ax
	; 32BIT: movw 20(%esp), %cx			; 32BIT: movw 20(%esp), %cx
	; 32BIT-NOT: movw %cx, %dx			; 32BIT-NOT: movw %cx, %dx
	; 32BIT: leal (%ecx,%eax), %edx			; 32BIT: leal (%ecx,%eax), %edx

	; 64BIT-LABEL: t4:			; 64BIT-LABEL: t4:
	; 64BIT-NOT: movw %si, %ax			; 64BIT-NOT: movw %si, %ax
	; 64BIT: leal (%rsi,%rdi), %eax			; 64BIT: movl %esi, %eax
	%0 = add i16 %k, %c ; <i16> [#uses=3]			%0 = add i16 %k, %c ; <i16> [#uses=3]
	%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]			%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
	br i1 %1, label %bb, label %bb1			br i1 %1, label %bb, label %bb1

	bb: ; preds = %entry			bb: ; preds = %entry
	tail call void @foo(i16 zeroext %0) nounwind			tail call void @foo(i16 zeroext %0) nounwind
	ret i16 %0			ret i16 %0

	bb1: ; preds = %entry			bb1: ; preds = %entry
	ret i16 %0			ret i16 %0
	}			}

llvm/trunk/test/CodeGen/X86/atomic16.ll

	Show First 20 Lines • Show All 148 Lines • ▼ Show 20 Lines
	; X32: lock			; X32: lock
	; X32: cmpxchgw			; X32: cmpxchgw
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	define void @atomic_fetch_max16(i16 %x) nounwind {			define void @atomic_fetch_max16(i16 %x) nounwind {
				; X64-LABEL: atomic_fetch_max16
				; X32-LABEL: atomic_fetch_max16
	%t1 = atomicrmw max i16* @sc16, i16 %x acquire			%t1 = atomicrmw max i16* @sc16, i16 %x acquire
	; X64: movswl			; X64: movw
	; X64: movswl			; X64: movw
	; X64: subl			; X64: subw
	; X64: cmov			; X64: cmov
	; X64: lock			; X64: lock
	; X64: cmpxchgw			; X64: cmpxchgw

	; X32: movswl			; X32: movw
	; X32: movswl			; X32: movw
	; X32: subl			; X32: subw
	; X32: cmov			; X32: cmov
	; X32: lock			; X32: lock
	; X32: cmpxchgw			; X32: cmpxchgw
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	define void @atomic_fetch_min16(i16 %x) nounwind {			define void @atomic_fetch_min16(i16 %x) nounwind {
				; X64-LABEL: atomic_fetch_min16
				; X32-LABEL: atomic_fetch_min16
	%t1 = atomicrmw min i16* @sc16, i16 %x acquire			%t1 = atomicrmw min i16* @sc16, i16 %x acquire
	; X64: movswl			; X64: movw
	; X64: movswl			; X64: movw
	; X64: subl			; X64: subw
	; X64: cmov			; X64: cmov
	; X64: lock			; X64: lock
	; X64: cmpxchgw			; X64: cmpxchgw

	; X32: movswl			; X32: movw
	; X32: movswl			; X32: movw
	; X32: subl			; X32: subw
	; X32: cmov			; X32: cmov
	; X32: lock			; X32: lock
	; X32: cmpxchgw			; X32: cmpxchgw
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	define void @atomic_fetch_umax16(i16 %x) nounwind {			define void @atomic_fetch_umax16(i16 %x) nounwind {
				; X64-LABEL: atomic_fetch_umax16
				; X32-LABEL: atomic_fetch_umax16
	%t1 = atomicrmw umax i16* @sc16, i16 %x acquire			%t1 = atomicrmw umax i16* @sc16, i16 %x acquire
	; X64: movzwl			; X64: movw
	; X64: movzwl			; X64: movw
	; X64: subl			; X64: subw
	; X64: cmov			; X64: cmov
	; X64: lock			; X64: lock
	; X64: cmpxchgw			; X64: cmpxchgw

	; X32: movzwl			; X32: movw
	; X32: movzwl			; X32: movw
	; X32: subl			; X32: subw
	; X32: cmov			; X32: cmov
	; X32: lock			; X32: lock
	; X32: cmpxchgw			; X32: cmpxchgw
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	define void @atomic_fetch_umin16(i16 %x) nounwind {			define void @atomic_fetch_umin16(i16 %x) nounwind {
				; X64-LABEL: atomic_fetch_umin16
				; X32-LABEL: atomic_fetch_umin16
	%t1 = atomicrmw umin i16* @sc16, i16 %x acquire			%t1 = atomicrmw umin i16* @sc16, i16 %x acquire
	; X64: movzwl			; X64: movw
	; X64: movzwl			; X64: movw
	; X64: subl			; X64: subw
	; X64: cmov			; X64: cmov
	; X64: lock			; X64: lock
	; X64: cmpxchgw			; X64: cmpxchgw

	; X32: movzwl			; X32: movw
	; X32: movzwl			; X32: movw
	; X32: subl			; X32: subw
	; X32: cmov			; X32: cmov
	; X32: lock			; X32: lock
	; X32: cmpxchgw			; X32: cmpxchgw
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	Show All 32 Lines

llvm/trunk/test/CodeGen/X86/atomic8.ll

	Show First 20 Lines • Show All 151 Lines • ▼ Show 20 Lines
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	define void @atomic_fetch_max8(i8 %x) nounwind {			define void @atomic_fetch_max8(i8 %x) nounwind {
	; X64-LABEL: atomic_fetch_max8:			; X64-LABEL: atomic_fetch_max8:
	; X32-LABEL: atomic_fetch_max8:			; X32-LABEL: atomic_fetch_max8:
	%t1 = atomicrmw max i8* @sc8, i8 %x acquire			%t1 = atomicrmw max i8* @sc8, i8 %x acquire
	; X64: movsbl			; X64: movb
	; X64: movsbl			; X64: movb
	; X64: subl			; X64: subb
	; X64: lock			; X64: lock
	; X64: cmpxchgb			; X64: cmpxchgb

	; X32: movsbl			; X32: movb
	; X32: movsbl			; X32: movb
	; X32: subl			; X32: subb
	; X32: lock			; X32: lock
	; X32: cmpxchgb			; X32: cmpxchgb
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	define void @atomic_fetch_min8(i8 %x) nounwind {			define void @atomic_fetch_min8(i8 %x) nounwind {
	; X64-LABEL: atomic_fetch_min8:			; X64-LABEL: atomic_fetch_min8:
	; X32-LABEL: atomic_fetch_min8:			; X32-LABEL: atomic_fetch_min8:
	%t1 = atomicrmw min i8* @sc8, i8 %x acquire			%t1 = atomicrmw min i8* @sc8, i8 %x acquire
	; X64: movsbl			; X64: movb
	; X64: movsbl			; X64: movb
	; X64: subl			; X64: subb
	; X64: lock			; X64: lock
	; X64: cmpxchgb			; X64: cmpxchgb

	; X32: movsbl			; X32: movb
	; X32: movsbl			; X32: movb
	; X32: subl			; X32: subb
	; X32: lock			; X32: lock
	; X32: cmpxchgb			; X32: cmpxchgb
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	define void @atomic_fetch_umax8(i8 %x) nounwind {			define void @atomic_fetch_umax8(i8 %x) nounwind {
	; X64-LABEL: atomic_fetch_umax8:			; X64-LABEL: atomic_fetch_umax8:
	; X32-LABEL: atomic_fetch_umax8:			; X32-LABEL: atomic_fetch_umax8:
	%t1 = atomicrmw umax i8* @sc8, i8 %x acquire			%t1 = atomicrmw umax i8* @sc8, i8 %x acquire
	; X64: movzbl			; X64: movb
	; X64: movzbl			; X64: movb
	; X64: subl			; X64: subb
	; X64: lock			; X64: lock
	; X64: cmpxchgb			; X64: cmpxchgb

	; X32: movzbl			; X32: movb
	; X32: movzbl			; X32: movb
	; X32: subl			; X32: subb
	; X32: lock			; X32: lock
	; X32: cmpxchgb			; X32: cmpxchgb
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	define void @atomic_fetch_umin8(i8 %x) nounwind {			define void @atomic_fetch_umin8(i8 %x) nounwind {
	; X64-LABEL: atomic_fetch_umin8:			; X64-LABEL: atomic_fetch_umin8:
	; X32-LABEL: atomic_fetch_umin8:			; X32-LABEL: atomic_fetch_umin8:
	%t1 = atomicrmw umin i8* @sc8, i8 %x acquire			%t1 = atomicrmw umin i8* @sc8, i8 %x acquire
	; X64: movzbl			; X64: movb
	; X64: movzbl			; X64: movb
	; X64: subl			; X64: subb
	; X64: lock			; X64: lock
	; X64: cmpxchgb			; X64: cmpxchgb

	; X32: movzbl			; X32: movb
	; X32: movzbl			; X32: movb
	; X32: subl			; X32: subb
	; X32: lock			; X32: lock
	; X32: cmpxchgb			; X32: cmpxchgb
	ret void			ret void
	; X64: ret			; X64: ret
	; X32: ret			; X32: ret
	}			}

	define void @atomic_fetch_cmpxchg8() nounwind {			define void @atomic_fetch_cmpxchg8() nounwind {
	Show All 37 Lines

llvm/trunk/test/CodeGen/X86/ctpop-combine.ll

	Show All 29 Lines

	define i32 @test3(i64 %x) nounwind readnone {			define i32 @test3(i64 %x) nounwind readnone {
	%count = tail call i64 @llvm.ctpop.i64(i64 %x)			%count = tail call i64 @llvm.ctpop.i64(i64 %x)
	%cast = trunc i64 %count to i6 ; Too small for 0-64			%cast = trunc i64 %count to i6 ; Too small for 0-64
	%cmp = icmp ult i6 %cast, 2			%cmp = icmp ult i6 %cast, 2
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	; CHECK-LABEL: test3:			; CHECK-LABEL: test3:
	; CHECK: cmpl $2			; CHECK: cmpb $2
	; CHECK: ret			; CHECK: ret
	}			}

llvm/trunk/test/CodeGen/X86/machine-sink-and-implicit-null-checks.ll

	Show First 20 Lines • Show All 61 Lines • ▼ Show 20 Lines
	}			}

	; Check that we have two implicit null checks in @f			; Check that we have two implicit null checks in @f

	; CHECK: __LLVM_FaultMaps:			; CHECK: __LLVM_FaultMaps:
	; CHECK-NEXT: .byte 1			; CHECK-NEXT: .byte 1
	; CHECK-NEXT: .byte 0			; CHECK-NEXT: .byte 0
	; CHECK-NEXT: .short 0			; CHECK-NEXT: .short 0
	; CHECK-NEXT: .long 2			; CHECK-NEXT: .long 1

	; FunctionInfo[0] =			; FunctionInfo[0] =

	; FunctionAddress =			; FunctionAddress =
	; CHECK-NEXT: .quad _f			; CHECK-NEXT: .quad _f

	; NumFaultingPCs =			; NumFaultingPCs =
	; CHECK-NEXT: .long 2			; CHECK-NEXT: .long 2

	; Reserved =			; Reserved =
	; CHECK-NEXT: .long 0			; CHECK-NEXT: .long 0

	!0 = !{}			!0 = !{}

llvm/trunk/test/CodeGen/X86/memcmp.ll

	Show All 17 Lines
	bb: ; preds = %entry			bb: ; preds = %entry
	store i32 4, i32* %P, align 4			store i32 4, i32* %P, align 4
	ret void			ret void

	return: ; preds = %entry			return: ; preds = %entry
	ret void			ret void
	; CHECK-LABEL: memcmp2:			; CHECK-LABEL: memcmp2:
	; CHECK: movzwl			; CHECK: movzwl
	; CHECK-NEXT: movzwl			; CHECK-NEXT: cmpw
	; CHECK-NEXT: cmpl
	; NOBUILTIN-LABEL: memcmp2:			; NOBUILTIN-LABEL: memcmp2:
	; NOBUILTIN: callq			; NOBUILTIN: callq
	}			}

	define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {			define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
	entry:			entry:
	%0 = tail call i32 (...) @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]			%0 = tail call i32 (...) @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
	%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]			%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
	br i1 %1, label %return, label %bb			br i1 %1, label %return, label %bb

	bb: ; preds = %entry			bb: ; preds = %entry
	store i32 4, i32* %P, align 4			store i32 4, i32* %P, align 4
	ret void			ret void

	return: ; preds = %entry			return: ; preds = %entry
	ret void			ret void
	; CHECK-LABEL: memcmp2a:			; CHECK-LABEL: memcmp2a:
	; CHECK: movzwl			; CHECK: cmpw $28527, (%
	; CHECK-NEXT: cmpl $28527,
	}			}


	define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {			define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
	entry:			entry:
	%0 = tail call i32 (...) @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]			%0 = tail call i32 (...) @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
	%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]			%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
	br i1 %1, label %return, label %bb			br i1 %1, label %return, label %bb
	▲ Show 20 Lines • Show All 62 Lines • Show Last 20 Lines

llvm/trunk/test/CodeGen/X86/pr5145.ll

	; RUN: llc -march=x86-64 < %s \| FileCheck %s			; RUN: llc -march=x86-64 < %s \| FileCheck %s
	@sc8 = external global i8			@sc8 = external global i8

	define void @atomic_maxmin_i8() {			define void @atomic_maxmin_i8() {
	; CHECK: atomic_maxmin_i8			; CHECK: atomic_maxmin_i8
	%1 = atomicrmw max i8* @sc8, i8 5 acquire			%1 = atomicrmw max i8* @sc8, i8 5 acquire
	; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]:			; CHECK: [[LABEL1:\.?LBB[0-9]+_[0-9]+]]:
	; CHECK: movsbl			; CHECK: cmpb
	; CHECK: cmpl			; CHECK: jg
	; CHECK: lock cmpxchgb			; CHECK: lock cmpxchgb
	; CHECK: jne [[LABEL1]]			; CHECK: jne [[LABEL1]]
	%2 = atomicrmw min i8* @sc8, i8 6 acquire			%2 = atomicrmw min i8* @sc8, i8 6 acquire
	; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]:			; CHECK: [[LABEL3:\.?LBB[0-9]+_[0-9]+]]:
	; CHECK: movsbl			; CHECK: cmpb
	; CHECK: cmpl			; CHECK: jl
	; CHECK: lock cmpxchgb			; CHECK: lock cmpxchgb
	; CHECK: jne [[LABEL3]]			; CHECK: jne [[LABEL3]]
	%3 = atomicrmw umax i8* @sc8, i8 7 acquire			%3 = atomicrmw umax i8* @sc8, i8 7 acquire
	; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]:			; CHECK: [[LABEL5:\.?LBB[0-9]+_[0-9]+]]:
	; CHECK: movzbl			; CHECK: cmpb
	; CHECK: cmpl			; CHECK: ja
	; CHECK: lock cmpxchgb			; CHECK: lock cmpxchgb
	; CHECK: jne [[LABEL5]]			; CHECK: jne [[LABEL5]]
	%4 = atomicrmw umin i8* @sc8, i8 8 acquire			%4 = atomicrmw umin i8* @sc8, i8 8 acquire
	; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]:			; CHECK: [[LABEL7:\.?LBB[0-9]+_[0-9]+]]:
	; CHECK: movzbl			; CHECK: cmpb
	; CHECK: cmpl			; CHECK: jb
	; CHECK: lock cmpxchgb			; CHECK: lock cmpxchgb
	; CHECK: jne [[LABEL7]]			; CHECK: jne [[LABEL7]]
	ret void			ret void
	}			}

llvm/trunk/test/CodeGen/X86/x86-shrink-wrapping.ll

	Show First 20 Lines • Show All 514 Lines • ▼ Show 20 Lines
	declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly)			declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly)

	; CHECK-LABEL: useLEA:			; CHECK-LABEL: useLEA:
	; DISABLE: pushq			; DISABLE: pushq
	;			;
	; CHECK: testq %rdi, %rdi			; CHECK: testq %rdi, %rdi
	; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]]			; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]]
	;			;
	; CHECK: movzwl (%rdi), [[BF_LOAD:%e[a-z]+]]			; CHECK: cmpw $66, (%rdi)
	; CHECK-NEXT: cmpl $66, [[BF_LOAD]]
	; CHECK-NEXT: jne [[CLEANUP]]			; CHECK-NEXT: jne [[CLEANUP]]
	;			;
	; CHECK: movq 8(%rdi), %rdi			; CHECK: movq 8(%rdi), %rdi
	; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]]			; CHECK-NEXT: movzwl (%rdi), %e[[BF_LOAD2:[a-z]+]]
	; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]]			; CHECK-NEXT: leal -54(%r[[BF_LOAD2]]), [[TMP:%e[a-z]+]]
	; CHECK-NEXT: cmpl $14, [[TMP]]			; CHECK-NEXT: cmpl $14, [[TMP]]
	; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]]			; CHECK-NEXT: ja [[LOR_LHS_FALSE:LBB[0-9_]+]]
	;			;
	▲ Show 20 Lines • Show All 452 Lines • Show Last 20 Lines