Diff 247879

llvm/lib/CodeGen/ExpandMemCmp.cpp

Show First 20 Lines • Show All 97 Lines • ▼ Show 20 Lines	class MemCmpExpansion {
void emitLoadCompareBlock(unsigned BlockIndex);		void emitLoadCompareBlock(unsigned BlockIndex);
void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,		void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
unsigned &LoadIndex);		unsigned &LoadIndex);
void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);		void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
void emitMemCmpResultBlock();		void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase();		Value *getMemCmpExpansionZeroCase();
Value *getMemCmpEqZeroOneBlock();		Value *getMemCmpEqZeroOneBlock();
Value *getMemCmpOneBlock();		Value *getMemCmpOneBlock();
Value getPtrToElementAtOffset(Value Source, Type *LoadSizeType,		struct LoadPair {
uint64_t OffsetBytes);		Value *Lhs = nullptr;
		Value *Rhs = nullptr;
		};
		LoadPair getLoadPair(Type LoadSizeType, bool NeedsBSwap, Type CmpSizeType,
		unsigned OffsetBytes);

static LoadEntryVector		static LoadEntryVector
computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,		computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);		unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
static LoadEntryVector		static LoadEntryVector
computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,		computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
unsigned MaxNumLoads,		unsigned MaxNumLoads,
unsigned &NumLoadsNonOneByte);		unsigned &NumLoadsNonOneByte);
▲ Show 20 Lines • Show All 140 Lines • ▼ Show 20 Lines	void MemCmpExpansion::createLoadCmpBlocks() {
}		}
}		}

void MemCmpExpansion::createResultBlock() {		void MemCmpExpansion::createResultBlock() {
ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",		ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
EndBlock->getParent(), EndBlock);		EndBlock->getParent(), EndBlock);
}		}

/// Return a pointer to an element of type `LoadSizeType` at offset		MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
/// `OffsetBytes`.		bool NeedsBSwap,
Value MemCmpExpansion::getPtrToElementAtOffset(Value Source,		Type *CmpSizeType,
Type *LoadSizeType,		unsigned OffsetBytes) {
uint64_t OffsetBytes) {		const auto MakeValue = [this, LoadSizeType, NeedsBSwap, CmpSizeType,
		OffsetBytes](int Index) {
		// Get the memory source at offset `OffsetBytes`.
		Value *Source = CI->getArgOperand(Index);
if (OffsetBytes > 0) {		if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());		auto *ByteType = Type::getInt8Ty(CI->getContext());
Source = Builder.CreateConstGEP1_64(		Source = Builder.CreateConstGEP1_64(
ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),		ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
OffsetBytes);		OffsetBytes);
}		}
return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());		Source = Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());

		// Create a constant or a load from the source.
		Value *V = nullptr;
		if (auto *C = dyn_cast<Constant>(Source))
		V = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
		if (!V)
		V = Builder.CreateLoad(LoadSizeType, Source);

		// Swap bytes if required.
		if (NeedsBSwap) {
		Function *Bswap = Intrinsic::getDeclaration(
		CI->getModule(), Intrinsic::bswap, LoadSizeType);
		V = Builder.CreateCall(Bswap, V);
		}

		// Zero extend if required.
		if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType)
		V = Builder.CreateZExt(V, CmpSizeType);
		return V;
		};
		return {MakeValue(0), MakeValue(1)};
}		}

// This function creates the IR instructions for loading and comparing 1 byte.		// This function creates the IR instructions for loading and comparing 1 byte.
// It loads 1 byte from each source of the memcmp parameters with the given		// It loads 1 byte from each source of the memcmp parameters with the given
// GEPIndex. It then subtracts the two loaded values and adds this result to the		// GEPIndex. It then subtracts the two loaded values and adds this result to the
// final phi node for selecting the memcmp result.		// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,		void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
unsigned OffsetBytes) {		unsigned OffsetBytes) {
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);		Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
Type *LoadSizeType = Type::getInt8Ty(CI->getContext());		const LoadPair Loads =
Value *Source1 =		getLoadPair(Type::getInt8Ty(CI->getContext()), /NeedsBSwap=/false,
getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes);		Type::getInt32Ty(CI->getContext()), OffsetBytes);
Value *Source2 =		Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes);

Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);

PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);		PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);

if (BlockIndex < (LoadCmpBlocks.size() - 1)) {		if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
// Early exit branch if difference found to EndBlock. Otherwise, continue to		// Early exit branch if difference found to EndBlock. Otherwise, continue to
// next LoadCmpBlock,		// next LoadCmpBlock,
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,		Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
ConstantInt::get(Diff->getType(), 0));		ConstantInt::get(Diff->getType(), 0));
Show All 30 Lines	Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
// If we have multiple loads per block, we need to generate a composite		// If we have multiple loads per block, we need to generate a composite
// comparison using xor+or. The type for the combinations is the largest load		// comparison using xor+or. The type for the combinations is the largest load
// type.		// type.
IntegerType *const MaxLoadType =		IntegerType *const MaxLoadType =
NumLoads == 1 ? nullptr		NumLoads == 1 ? nullptr
: IntegerType::get(CI->getContext(), MaxLoadSize * 8);		: IntegerType::get(CI->getContext(), MaxLoadSize * 8);
for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {		for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];		const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
		const LoadPair Loads = getLoadPair(
IntegerType *LoadSizeType =		IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);		/NeedsBSwap=/false, MaxLoadType, CurLoadEntry.Offset);

Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
CurLoadEntry.Offset);
Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
CurLoadEntry.Offset);

// Get a constant or load a value for each source address.
Value *LoadSrc1 = nullptr;
if (auto *Source1C = dyn_cast<Constant>(Source1))
LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
if (!LoadSrc1)
LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);

Value *LoadSrc2 = nullptr;
if (auto *Source2C = dyn_cast<Constant>(Source2))
LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
if (!LoadSrc2)
LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

if (NumLoads != 1) {		if (NumLoads != 1) {
if (LoadSizeType != MaxLoadType) {
LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}
// If we have multiple loads per block, we need to generate a composite		// If we have multiple loads per block, we need to generate a composite
// comparison using xor+or.		// comparison using xor+or.
Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);		Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
Diff = Builder.CreateZExt(Diff, MaxLoadType);		Diff = Builder.CreateZExt(Diff, MaxLoadType);
XorList.push_back(Diff);		XorList.push_back(Diff);
} else {		} else {
// If there's only one load per block, we just compare the loaded values.		// If there's only one load per block, we just compare the loaded values.
Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);		Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
}		}
}		}

auto pairWiseOr = [&](std::vector<Value > &InList) -> std::vector<Value > {		auto pairWiseOr = [&](std::vector<Value > &InList) -> std::vector<Value > {
std::vector<Value *> OutList;		std::vector<Value *> OutList;
for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {		for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);		Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
OutList.push_back(Or);		OutList.push_back(Or);
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {

Type *LoadSizeType =		Type *LoadSizeType =
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);		IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
Type MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize 8);		Type MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize 8);
assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");		assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");

Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);		Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);

Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,		const LoadPair Loads =
CurLoadEntry.Offset);		getLoadPair(LoadSizeType, /NeedsBSwap=/DL.isLittleEndian(), MaxLoadType,
Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
CurLoadEntry.Offset);		CurLoadEntry.Offset);

// Load LoadSizeType from the base address.
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

if (DL.isLittleEndian()) {
Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::bswap, LoadSizeType);
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
}

if (LoadSizeType != MaxLoadType) {
LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}

// Add the loaded values to the phi nodes for calculating memcmp result only		// Add the loaded values to the phi nodes for calculating memcmp result only
// if result is not used in a zero equality.		// if result is not used in a zero equality.
if (!IsUsedForZeroCmp) {		if (!IsUsedForZeroCmp) {
ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]);		ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]);		ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
}		}

Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);		Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))		BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
? EndBlock		? EndBlock
: LoadCmpBlocks[BlockIndex + 1];		: LoadCmpBlocks[BlockIndex + 1];
// Early exit branch if difference found to ResultBlock. Otherwise, continue		// Early exit branch if difference found to ResultBlock. Otherwise, continue
// to next LoadCmpBlock or EndBlock.		// to next LoadCmpBlock or EndBlock.
BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);		BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
Builder.Insert(CmpBr);		Builder.Insert(CmpBr);

▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
assert(LoadIndex == getNumLoads() && "some entries were not consumed");		assert(LoadIndex == getNumLoads() && "some entries were not consumed");
return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));		return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
}		}

/// A memcmp expansion that only has one block of load and compare can bypass		/// A memcmp expansion that only has one block of load and compare can bypass
/// the compare, branch, and phi IR that is required in the general case.		/// the compare, branch, and phi IR that is required in the general case.
Value *MemCmpExpansion::getMemCmpOneBlock() {		Value *MemCmpExpansion::getMemCmpOneBlock() {
Type LoadSizeType = IntegerType::get(CI->getContext(), Size 8);		Type LoadSizeType = IntegerType::get(CI->getContext(), Size 8);
Value *Source1 = CI->getArgOperand(0);		bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
Value *Source2 = CI->getArgOperand(1);

// Cast source to LoadSizeType*.
if (Source1->getType() != LoadSizeType)
Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
if (Source2->getType() != LoadSizeType)
Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());

// Load LoadSizeType from the base address.
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

if (DL.isLittleEndian() && Size != 1) {
Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::bswap, LoadSizeType);
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
}

if (Size < 4) {
// The i8 and i16 cases don't need compares. We zext the loaded values and		// The i8 and i16 cases don't need compares. We zext the loaded values and
// subtract them to get the suitable negative, zero, or positive i32 result.		// subtract them to get the suitable negative, zero, or positive i32 result.
LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty());		if (Size < 4) {
LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty());		const LoadPair Loads =
return Builder.CreateSub(LoadSrc1, LoadSrc2);		getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
		/Offset/ 0);
		return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
}		}

		const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
		/Offset/ 0);
// The result of memcmp is negative, zero, or positive, so produce that by		// The result of memcmp is negative, zero, or positive, so produce that by
// subtracting 2 extended compare bits: sub (ugt, ult).		// subtracting 2 extended compare bits: sub (ugt, ult).
// If a target prefers to use selects to get -1/0/1, they should be able		// If a target prefers to use selects to get -1/0/1, they should be able
// to transform this later. The inverse transform (going from selects to math)		// to transform this later. The inverse transform (going from selects to math)
// may not be possible in the DAG because the selects got converted into		// may not be possible in the DAG because the selects got converted into
// branches before we got there.		// branches before we got there.
Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2);		Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);		Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());		Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());		Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
return Builder.CreateSub(ZextUGT, ZextULT);		return Builder.CreateSub(ZextUGT, ZextULT);
}		}

// This function expands the memcmp call into an inline expansion and returns		// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.		// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion() {		Value *MemCmpExpansion::getMemCmpExpansion() {
▲ Show 20 Lines • Show All 275 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll

Show First 20 Lines • Show All 229 Lines • ▼ Show 20 Lines

define i32 @length3(i8* %X, i8* %Y) nounwind {		define i32 @length3(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length3:		; X86-LABEL: length3:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %edx		; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx		; X86-NEXT: rolw $8, %dx
		; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %si		; X86-NEXT: rolw $8, %si
; X86-NEXT: cmpw %si, %dx		; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB9_1		; X86-NEXT: jne .LBB9_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax		; X86-NEXT: movzbl 2(%eax), %eax
; X86-NEXT: movzbl 2(%ecx), %ecx		; X86-NEXT: movzbl 2(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
; X86-NEXT: .LBB9_1: # %res_block		; X86-NEXT: .LBB9_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length3:		; X64-LABEL: length3:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
		; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %cx		; X64-NEXT: rolw $8, %cx
; X64-NEXT: cmpw %cx, %ax		; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB9_1		; X64-NEXT: jne .LBB9_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax		; X64-NEXT: movzbl 2(%rdi), %eax
; X64-NEXT: movzbl 2(%rsi), %ecx		; X64-NEXT: movzbl 2(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
▲ Show 20 Lines • Show All 176 Lines • ▼ Show 20 Lines

define i32 @length5(i8* %X, i8* %Y) nounwind {		define i32 @length5(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length5:		; X86-LABEL: length5:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB16_1		; X86-NEXT: jne .LBB16_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
; X86-NEXT: .LBB16_1: # %res_block		; X86-NEXT: .LBB16_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5:		; X64-LABEL: length5:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB16_1		; X64-NEXT: jne .LBB16_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 37 Lines

define i1 @length5_lt(i8* %X, i8* %Y) nounwind {		define i1 @length5_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length5_lt:		; X86-LABEL: length5_lt:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB18_1		; X86-NEXT: jne .LBB18_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB18_3		; X86-NEXT: jmp .LBB18_3
; X86-NEXT: .LBB18_1: # %res_block		; X86-NEXT: .LBB18_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB18_3: # %endblock		; X86-NEXT: .LBB18_3: # %endblock
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5_lt:		; X64-LABEL: length5_lt:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB18_1		; X64-NEXT: jne .LBB18_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
Show All 40 Lines

define i32 @length8(i8* %X, i8* %Y) nounwind {		define i32 @length8(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length8:		; X86-LABEL: length8:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%esi), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB20_2		; X86-NEXT: jne .LBB20_2
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: je .LBB20_3		; X86-NEXT: je .LBB20_3
; X86-NEXT: .LBB20_2: # %res_block		; X86-NEXT: .LBB20_2: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
▲ Show 20 Lines • Show All 198 Lines • ▼ Show 20 Lines

define i32 @length12(i8* %X, i8* %Y) nounwind {		define i32 @length12(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length12:		; X86-LABEL: length12:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%esi), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB27_3		; X86-NEXT: jne .LBB27_3
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB27_3		; X86-NEXT: jne .LBB27_3
; X86-NEXT: # %bb.2: # %loadbb2		; X86-NEXT: # %bb.2: # %loadbb2
; X86-NEXT: movl 8(%esi), %ecx		; X86-NEXT: movl 8(%esi), %ecx
; X86-NEXT: movl 8(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 8(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: je .LBB27_4		; X86-NEXT: je .LBB27_4
; X86-NEXT: .LBB27_3: # %res_block		; X86-NEXT: .LBB27_3: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB27_4: # %endblock		; X86-NEXT: .LBB27_4: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length12:		; X64-LABEL: length12:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB27_2		; X64-NEXT: jne .LBB27_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx		; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB27_3		; X64-NEXT: je .LBB27_3
; X64-NEXT: .LBB27_2: # %res_block		; X64-NEXT: .LBB27_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines

; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329		; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329

define i32 @length16(i8* %X, i8* %Y) nounwind {		define i32 @length16(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length16:		; X86-LABEL: length16:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%edx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl (%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB31_4		; X86-NEXT: jne .LBB31_4
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%edx), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 4(%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB31_4		; X86-NEXT: jne .LBB31_4
; X86-NEXT: # %bb.2: # %loadbb2		; X86-NEXT: # %bb.2: # %loadbb2
; X86-NEXT: movl 8(%esi), %ecx		; X86-NEXT: movl 8(%edx), %ecx
; X86-NEXT: movl 8(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 8(%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB31_4		; X86-NEXT: jne .LBB31_4
; X86-NEXT: # %bb.3: # %loadbb3		; X86-NEXT: # %bb.3: # %loadbb3
; X86-NEXT: movl 12(%esi), %ecx		; X86-NEXT: movl 12(%edx), %ecx
; X86-NEXT: movl 12(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 12(%eax), %esi
		; X86-NEXT: bswapl %esi
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: je .LBB31_5		; X86-NEXT: je .LBB31_5
; X86-NEXT: .LBB31_4: # %res_block		; X86-NEXT: .LBB31_4: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB31_5: # %endblock		; X86-NEXT: .LBB31_5: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16:		; X64-LABEL: length16:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB31_2		; X64-NEXT: jne .LBB31_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB31_3		; X64-NEXT: je .LBB31_3
; X64-NEXT: .LBB31_2: # %res_block		; X64-NEXT: .LBB31_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines	; X64-MIC-AVX-NEXT: retq
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @length16_lt(i8* %x, i8* %y) nounwind {		define i1 @length16_lt(i8* %x, i8* %y) nounwind {
; X86-LABEL: length16_lt:		; X86-LABEL: length16_lt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%edx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl (%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB33_4		; X86-NEXT: jne .LBB33_4
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%edx), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 4(%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB33_4		; X86-NEXT: jne .LBB33_4
; X86-NEXT: # %bb.2: # %loadbb2		; X86-NEXT: # %bb.2: # %loadbb2
; X86-NEXT: movl 8(%esi), %ecx		; X86-NEXT: movl 8(%edx), %ecx
; X86-NEXT: movl 8(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 8(%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB33_4		; X86-NEXT: jne .LBB33_4
; X86-NEXT: # %bb.3: # %loadbb3		; X86-NEXT: # %bb.3: # %loadbb3
; X86-NEXT: movl 12(%esi), %ecx		; X86-NEXT: movl 12(%edx), %ecx
; X86-NEXT: movl 12(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 12(%eax), %esi
		; X86-NEXT: bswapl %esi
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: je .LBB33_5		; X86-NEXT: je .LBB33_5
; X86-NEXT: .LBB33_4: # %res_block		; X86-NEXT: .LBB33_4: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB33_5: # %endblock		; X86-NEXT: .LBB33_5: # %endblock
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16_lt:		; X64-LABEL: length16_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB33_2		; X64-NEXT: jne .LBB33_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB33_3		; X64-NEXT: je .LBB33_3
; X64-NEXT: .LBB33_2: # %res_block		; X64-NEXT: .LBB33_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
; X64-NEXT: leal -1(%rax,%rax), %eax		; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB33_3: # %endblock		; X64-NEXT: .LBB33_3: # %endblock
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind		%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
%cmp = icmp slt i32 %call, 0		%cmp = icmp slt i32 %call, 0
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @length16_gt(i8* %x, i8* %y) nounwind {		define i1 @length16_gt(i8* %x, i8* %y) nounwind {
; X86-LABEL: length16_gt:		; X86-LABEL: length16_gt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx		; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl (%edx), %eax
; X86-NEXT: movl (%esi), %eax
; X86-NEXT: movl (%edx), %ecx
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: jne .LBB34_4		; X86-NEXT: jne .LBB34_4
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %eax		; X86-NEXT: movl 4(%edx), %eax
; X86-NEXT: movl 4(%edx), %ecx
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: movl 4(%ecx), %esi
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: jne .LBB34_4		; X86-NEXT: jne .LBB34_4
; X86-NEXT: # %bb.2: # %loadbb2		; X86-NEXT: # %bb.2: # %loadbb2
; X86-NEXT: movl 8(%esi), %eax		; X86-NEXT: movl 8(%edx), %eax
; X86-NEXT: movl 8(%edx), %ecx
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: movl 8(%ecx), %esi
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: jne .LBB34_4		; X86-NEXT: jne .LBB34_4
; X86-NEXT: # %bb.3: # %loadbb3		; X86-NEXT: # %bb.3: # %loadbb3
; X86-NEXT: movl 12(%esi), %eax		; X86-NEXT: movl 12(%edx), %eax
; X86-NEXT: movl 12(%edx), %ecx
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: movl 12(%ecx), %esi
; X86-NEXT: xorl %edx, %edx		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: xorl %ecx, %ecx
		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: je .LBB34_5		; X86-NEXT: je .LBB34_5
; X86-NEXT: .LBB34_4: # %res_block		; X86-NEXT: .LBB34_4: # %res_block
; X86-NEXT: xorl %edx, %edx		; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: setae %dl		; X86-NEXT: setae %cl
; X86-NEXT: leal -1(%edx,%edx), %edx		; X86-NEXT: leal -1(%ecx,%ecx), %ecx
; X86-NEXT: .LBB34_5: # %endblock		; X86-NEXT: .LBB34_5: # %endblock
; X86-NEXT: testl %edx, %edx		; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16_gt:		; X64-LABEL: length16_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax		; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB34_2		; X64-NEXT: jne .LBB34_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rax		; X64-NEXT: movq 8(%rdi), %rax
; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: je .LBB34_3		; X64-NEXT: je .LBB34_3
; X64-NEXT: .LBB34_2: # %res_block		; X64-NEXT: .LBB34_2: # %res_block
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: setae %dl		; X64-NEXT: setae %dl
▲ Show 20 Lines • Show All 115 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length24:		; X64-LABEL: length24:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB36_3		; X64-NEXT: jne .LBB36_3
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB36_3		; X64-NEXT: jne .LBB36_3
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rcx		; X64-NEXT: movq 16(%rdi), %rcx
; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB36_4		; X64-NEXT: je .LBB36_4
; X64-NEXT: .LBB36_3: # %res_block		; X64-NEXT: .LBB36_3: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length24_lt:		; X64-LABEL: length24_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB38_3		; X64-NEXT: jne .LBB38_3
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB38_3		; X64-NEXT: jne .LBB38_3
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rcx		; X64-NEXT: movq 16(%rdi), %rcx
; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB38_4		; X64-NEXT: je .LBB38_4
; X64-NEXT: .LBB38_3: # %res_block		; X64-NEXT: .LBB38_3: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
Show All 18 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: testl %eax, %eax		; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length24_gt:		; X64-LABEL: length24_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax		; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB39_3		; X64-NEXT: jne .LBB39_3
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rax		; X64-NEXT: movq 8(%rdi), %rax
; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB39_3		; X64-NEXT: jne .LBB39_3
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rax		; X64-NEXT: movq 16(%rdi), %rax
; X64-NEXT: movq 16(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 16(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: je .LBB39_4		; X64-NEXT: je .LBB39_4
; X64-NEXT: .LBB39_3: # %res_block		; X64-NEXT: .LBB39_3: # %res_block
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: setae %dl		; X64-NEXT: setae %dl
▲ Show 20 Lines • Show All 513 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length32:		; X64-LABEL: length32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB47_4		; X64-NEXT: jne .LBB47_4
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB47_4		; X64-NEXT: jne .LBB47_4
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rcx		; X64-NEXT: movq 16(%rdi), %rcx
; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB47_4		; X64-NEXT: jne .LBB47_4
; X64-NEXT: # %bb.3: # %loadbb3		; X64-NEXT: # %bb.3: # %loadbb3
; X64-NEXT: movq 24(%rdi), %rcx		; X64-NEXT: movq 24(%rdi), %rcx
; X64-NEXT: movq 24(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 24(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB47_5		; X64-NEXT: je .LBB47_5
; X64-NEXT: .LBB47_4: # %res_block		; X64-NEXT: .LBB47_4: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length32_lt:		; X64-LABEL: length32_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB49_4		; X64-NEXT: jne .LBB49_4
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB49_4		; X64-NEXT: jne .LBB49_4
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rcx		; X64-NEXT: movq 16(%rdi), %rcx
; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB49_4		; X64-NEXT: jne .LBB49_4
; X64-NEXT: # %bb.3: # %loadbb3		; X64-NEXT: # %bb.3: # %loadbb3
; X64-NEXT: movq 24(%rdi), %rcx		; X64-NEXT: movq 24(%rdi), %rcx
; X64-NEXT: movq 24(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 24(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB49_5		; X64-NEXT: je .LBB49_5
; X64-NEXT: .LBB49_4: # %res_block		; X64-NEXT: .LBB49_4: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
Show All 18 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: testl %eax, %eax		; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length32_gt:		; X64-LABEL: length32_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax		; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB50_4		; X64-NEXT: jne .LBB50_4
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rax		; X64-NEXT: movq 8(%rdi), %rax
; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB50_4		; X64-NEXT: jne .LBB50_4
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rax		; X64-NEXT: movq 16(%rdi), %rax
; X64-NEXT: movq 16(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 16(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB50_4		; X64-NEXT: jne .LBB50_4
; X64-NEXT: # %bb.3: # %loadbb3		; X64-NEXT: # %bb.3: # %loadbb3
; X64-NEXT: movq 24(%rdi), %rax		; X64-NEXT: movq 24(%rdi), %rax
; X64-NEXT: movq 24(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 24(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: je .LBB50_5		; X64-NEXT: je .LBB50_5
; X64-NEXT: .LBB50_4: # %res_block		; X64-NEXT: .LBB50_4: # %res_block
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: setae %dl		; X64-NEXT: setae %dl
▲ Show 20 Lines • Show All 3,784 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/memcmp-optsize.ll

	Show First 20 Lines • Show All 110 Lines • ▼ Show 20 Lines

	define i32 @length3(i8* %X, i8* %Y) nounwind optsize {			define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
	; X86-LABEL: length3:			; X86-LABEL: length3:
	; X86: # %bb.0: # %loadbb			; X86: # %bb.0: # %loadbb
	; X86-NEXT: pushl %esi			; X86-NEXT: pushl %esi
	; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx			; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movzwl (%eax), %edx			; X86-NEXT: movzwl (%eax), %edx
	; X86-NEXT: movzwl (%ecx), %esi
	; X86-NEXT: rolw $8, %dx			; X86-NEXT: rolw $8, %dx
				; X86-NEXT: movzwl (%ecx), %esi
	; X86-NEXT: rolw $8, %si			; X86-NEXT: rolw $8, %si
	; X86-NEXT: cmpw %si, %dx			; X86-NEXT: cmpw %si, %dx
	; X86-NEXT: jne .LBB4_1			; X86-NEXT: jne .LBB4_1
	; X86-NEXT: # %bb.2: # %loadbb1			; X86-NEXT: # %bb.2: # %loadbb1
	; X86-NEXT: movzbl 2(%eax), %eax			; X86-NEXT: movzbl 2(%eax), %eax
	; X86-NEXT: movzbl 2(%ecx), %ecx			; X86-NEXT: movzbl 2(%ecx), %ecx
	; X86-NEXT: subl %ecx, %eax			; X86-NEXT: subl %ecx, %eax
	; X86-NEXT: jmp .LBB4_3			; X86-NEXT: jmp .LBB4_3
	; X86-NEXT: .LBB4_1: # %res_block			; X86-NEXT: .LBB4_1: # %res_block
	; X86-NEXT: setae %al			; X86-NEXT: setae %al
	; X86-NEXT: movzbl %al, %eax			; X86-NEXT: movzbl %al, %eax
	; X86-NEXT: leal -1(%eax,%eax), %eax			; X86-NEXT: leal -1(%eax,%eax), %eax
	; X86-NEXT: .LBB4_3: # %endblock			; X86-NEXT: .LBB4_3: # %endblock
	; X86-NEXT: popl %esi			; X86-NEXT: popl %esi
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: length3:			; X64-LABEL: length3:
	; X64: # %bb.0: # %loadbb			; X64: # %bb.0: # %loadbb
	; X64-NEXT: movzwl (%rdi), %eax			; X64-NEXT: movzwl (%rdi), %eax
	; X64-NEXT: movzwl (%rsi), %ecx
	; X64-NEXT: rolw $8, %ax			; X64-NEXT: rolw $8, %ax
				; X64-NEXT: movzwl (%rsi), %ecx
	; X64-NEXT: rolw $8, %cx			; X64-NEXT: rolw $8, %cx
	; X64-NEXT: cmpw %cx, %ax			; X64-NEXT: cmpw %cx, %ax
	; X64-NEXT: jne .LBB4_1			; X64-NEXT: jne .LBB4_1
	; X64-NEXT: # %bb.2: # %loadbb1			; X64-NEXT: # %bb.2: # %loadbb1
	; X64-NEXT: movzbl 2(%rdi), %eax			; X64-NEXT: movzbl 2(%rdi), %eax
	; X64-NEXT: movzbl 2(%rsi), %ecx			; X64-NEXT: movzbl 2(%rsi), %ecx
	; X64-NEXT: subl %ecx, %eax			; X64-NEXT: subl %ecx, %eax
	; X64-NEXT: retq			; X64-NEXT: retq
	▲ Show 20 Lines • Show All 106 Lines • ▼ Show 20 Lines

	define i32 @length5(i8* %X, i8* %Y) nounwind optsize {			define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
	; X86-LABEL: length5:			; X86-LABEL: length5:
	; X86: # %bb.0: # %loadbb			; X86: # %bb.0: # %loadbb
	; X86-NEXT: pushl %esi			; X86-NEXT: pushl %esi
	; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx			; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movl (%eax), %edx			; X86-NEXT: movl (%eax), %edx
	; X86-NEXT: movl (%ecx), %esi
	; X86-NEXT: bswapl %edx			; X86-NEXT: bswapl %edx
				; X86-NEXT: movl (%ecx), %esi
	; X86-NEXT: bswapl %esi			; X86-NEXT: bswapl %esi
	; X86-NEXT: cmpl %esi, %edx			; X86-NEXT: cmpl %esi, %edx
	; X86-NEXT: jne .LBB9_1			; X86-NEXT: jne .LBB9_1
	; X86-NEXT: # %bb.2: # %loadbb1			; X86-NEXT: # %bb.2: # %loadbb1
	; X86-NEXT: movzbl 4(%eax), %eax			; X86-NEXT: movzbl 4(%eax), %eax
	; X86-NEXT: movzbl 4(%ecx), %ecx			; X86-NEXT: movzbl 4(%ecx), %ecx
	; X86-NEXT: subl %ecx, %eax			; X86-NEXT: subl %ecx, %eax
	; X86-NEXT: jmp .LBB9_3			; X86-NEXT: jmp .LBB9_3
	; X86-NEXT: .LBB9_1: # %res_block			; X86-NEXT: .LBB9_1: # %res_block
	; X86-NEXT: setae %al			; X86-NEXT: setae %al
	; X86-NEXT: movzbl %al, %eax			; X86-NEXT: movzbl %al, %eax
	; X86-NEXT: leal -1(%eax,%eax), %eax			; X86-NEXT: leal -1(%eax,%eax), %eax
	; X86-NEXT: .LBB9_3: # %endblock			; X86-NEXT: .LBB9_3: # %endblock
	; X86-NEXT: popl %esi			; X86-NEXT: popl %esi
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: length5:			; X64-LABEL: length5:
	; X64: # %bb.0: # %loadbb			; X64: # %bb.0: # %loadbb
	; X64-NEXT: movl (%rdi), %eax			; X64-NEXT: movl (%rdi), %eax
	; X64-NEXT: movl (%rsi), %ecx
	; X64-NEXT: bswapl %eax			; X64-NEXT: bswapl %eax
				; X64-NEXT: movl (%rsi), %ecx
	; X64-NEXT: bswapl %ecx			; X64-NEXT: bswapl %ecx
	; X64-NEXT: cmpl %ecx, %eax			; X64-NEXT: cmpl %ecx, %eax
	; X64-NEXT: jne .LBB9_1			; X64-NEXT: jne .LBB9_1
	; X64-NEXT: # %bb.2: # %loadbb1			; X64-NEXT: # %bb.2: # %loadbb1
	; X64-NEXT: movzbl 4(%rdi), %eax			; X64-NEXT: movzbl 4(%rdi), %eax
	; X64-NEXT: movzbl 4(%rsi), %ecx			; X64-NEXT: movzbl 4(%rsi), %ecx
	; X64-NEXT: subl %ecx, %eax			; X64-NEXT: subl %ecx, %eax
	; X64-NEXT: retq			; X64-NEXT: retq
	Show All 37 Lines

	define i32 @length8(i8* %X, i8* %Y) nounwind optsize {			define i32 @length8(i8* %X, i8* %Y) nounwind optsize {
	; X86-LABEL: length8:			; X86-LABEL: length8:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: pushl %esi			; X86-NEXT: pushl %esi
	; X86-NEXT: movl {{[0-9]+}}(%esp), %eax			; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
	; X86-NEXT: movl {{[0-9]+}}(%esp), %esi			; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
	; X86-NEXT: movl (%esi), %ecx			; X86-NEXT: movl (%esi), %ecx
	; X86-NEXT: movl (%eax), %edx
	; X86-NEXT: bswapl %ecx			; X86-NEXT: bswapl %ecx
				; X86-NEXT: movl (%eax), %edx
	; X86-NEXT: bswapl %edx			; X86-NEXT: bswapl %edx
	; X86-NEXT: cmpl %edx, %ecx			; X86-NEXT: cmpl %edx, %ecx
	; X86-NEXT: jne .LBB11_2			; X86-NEXT: jne .LBB11_2
	; X86-NEXT: # %bb.1: # %loadbb1			; X86-NEXT: # %bb.1: # %loadbb1
	; X86-NEXT: movl 4(%esi), %ecx			; X86-NEXT: movl 4(%esi), %ecx
	; X86-NEXT: movl 4(%eax), %edx
	; X86-NEXT: bswapl %ecx			; X86-NEXT: bswapl %ecx
				; X86-NEXT: movl 4(%eax), %edx
	; X86-NEXT: bswapl %edx			; X86-NEXT: bswapl %edx
	; X86-NEXT: xorl %eax, %eax			; X86-NEXT: xorl %eax, %eax
	; X86-NEXT: cmpl %edx, %ecx			; X86-NEXT: cmpl %edx, %ecx
	; X86-NEXT: je .LBB11_3			; X86-NEXT: je .LBB11_3
	; X86-NEXT: .LBB11_2: # %res_block			; X86-NEXT: .LBB11_2: # %res_block
	; X86-NEXT: xorl %eax, %eax			; X86-NEXT: xorl %eax, %eax
	; X86-NEXT: cmpl %edx, %ecx			; X86-NEXT: cmpl %edx, %ecx
	; X86-NEXT: setae %al			; X86-NEXT: setae %al
	▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines
	; X86-NEXT: pushl {{[0-9]+}}(%esp)			; X86-NEXT: pushl {{[0-9]+}}(%esp)
	; X86-NEXT: calll memcmp			; X86-NEXT: calll memcmp
	; X86-NEXT: addl $16, %esp			; X86-NEXT: addl $16, %esp
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: length12:			; X64-LABEL: length12:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movq (%rdi), %rcx			; X64-NEXT: movq (%rdi), %rcx
	; X64-NEXT: movq (%rsi), %rdx
	; X64-NEXT: bswapq %rcx			; X64-NEXT: bswapq %rcx
				; X64-NEXT: movq (%rsi), %rdx
	; X64-NEXT: bswapq %rdx			; X64-NEXT: bswapq %rdx
	; X64-NEXT: cmpq %rdx, %rcx			; X64-NEXT: cmpq %rdx, %rcx
	; X64-NEXT: jne .LBB15_2			; X64-NEXT: jne .LBB15_2
	; X64-NEXT: # %bb.1: # %loadbb1			; X64-NEXT: # %bb.1: # %loadbb1
	; X64-NEXT: movl 8(%rdi), %ecx			; X64-NEXT: movl 8(%rdi), %ecx
	; X64-NEXT: movl 8(%rsi), %edx
	; X64-NEXT: bswapl %ecx			; X64-NEXT: bswapl %ecx
				; X64-NEXT: movl 8(%rsi), %edx
	; X64-NEXT: bswapl %edx			; X64-NEXT: bswapl %edx
	; X64-NEXT: xorl %eax, %eax			; X64-NEXT: xorl %eax, %eax
	; X64-NEXT: cmpq %rdx, %rcx			; X64-NEXT: cmpq %rdx, %rcx
	; X64-NEXT: je .LBB15_3			; X64-NEXT: je .LBB15_3
	; X64-NEXT: .LBB15_2: # %res_block			; X64-NEXT: .LBB15_2: # %res_block
	; X64-NEXT: xorl %eax, %eax			; X64-NEXT: xorl %eax, %eax
	; X64-NEXT: cmpq %rdx, %rcx			; X64-NEXT: cmpq %rdx, %rcx
	; X64-NEXT: setae %al			; X64-NEXT: setae %al
	Show All 15 Lines
	; X86-NEXT: pushl {{[0-9]+}}(%esp)			; X86-NEXT: pushl {{[0-9]+}}(%esp)
	; X86-NEXT: calll memcmp			; X86-NEXT: calll memcmp
	; X86-NEXT: addl $16, %esp			; X86-NEXT: addl $16, %esp
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: length16:			; X64-LABEL: length16:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movq (%rdi), %rcx			; X64-NEXT: movq (%rdi), %rcx
	; X64-NEXT: movq (%rsi), %rdx
	; X64-NEXT: bswapq %rcx			; X64-NEXT: bswapq %rcx
				; X64-NEXT: movq (%rsi), %rdx
	; X64-NEXT: bswapq %rdx			; X64-NEXT: bswapq %rdx
	; X64-NEXT: cmpq %rdx, %rcx			; X64-NEXT: cmpq %rdx, %rcx
	; X64-NEXT: jne .LBB16_2			; X64-NEXT: jne .LBB16_2
	; X64-NEXT: # %bb.1: # %loadbb1			; X64-NEXT: # %bb.1: # %loadbb1
	; X64-NEXT: movq 8(%rdi), %rcx			; X64-NEXT: movq 8(%rdi), %rcx
	; X64-NEXT: movq 8(%rsi), %rdx
	; X64-NEXT: bswapq %rcx			; X64-NEXT: bswapq %rcx
				; X64-NEXT: movq 8(%rsi), %rdx
	; X64-NEXT: bswapq %rdx			; X64-NEXT: bswapq %rdx
	; X64-NEXT: xorl %eax, %eax			; X64-NEXT: xorl %eax, %eax
	; X64-NEXT: cmpq %rdx, %rcx			; X64-NEXT: cmpq %rdx, %rcx
	; X64-NEXT: je .LBB16_3			; X64-NEXT: je .LBB16_3
	; X64-NEXT: .LBB16_2: # %res_block			; X64-NEXT: .LBB16_2: # %res_block
	; X64-NEXT: xorl %eax, %eax			; X64-NEXT: xorl %eax, %eax
	; X64-NEXT: cmpq %rdx, %rcx			; X64-NEXT: cmpq %rdx, %rcx
	; X64-NEXT: setae %al			; X64-NEXT: setae %al
	▲ Show 20 Lines • Show All 527 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/memcmp-pgso.ll

Show All 10 Lines
@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1		@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1

declare i32 @memcmp(i8, i8, i64)		declare i32 @memcmp(i8, i8, i64)
declare i32 @bcmp(i8, i8, i64)		declare i32 @bcmp(i8, i8, i64)

define i32 @length2(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length2(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length2:		; X86-LABEL: length2:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2:		; X64-LABEL: length2:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

define i1 @length2_eq(i8* %X, i8* %Y) nounwind !prof !14 {		define i1 @length2_eq(i8* %X, i8* %Y) nounwind !prof !14 {
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines

define i32 @length3(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length3(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length3:		; X86-LABEL: length3:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %edx		; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx		; X86-NEXT: rolw $8, %dx
		; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %si		; X86-NEXT: rolw $8, %si
; X86-NEXT: cmpw %si, %dx		; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB4_1		; X86-NEXT: jne .LBB4_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax		; X86-NEXT: movzbl 2(%eax), %eax
; X86-NEXT: movzbl 2(%ecx), %ecx		; X86-NEXT: movzbl 2(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB4_3		; X86-NEXT: jmp .LBB4_3
; X86-NEXT: .LBB4_1: # %res_block		; X86-NEXT: .LBB4_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB4_3: # %endblock		; X86-NEXT: .LBB4_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length3:		; X64-LABEL: length3:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
		; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %cx		; X64-NEXT: rolw $8, %cx
; X64-NEXT: cmpw %cx, %ax		; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB4_1		; X64-NEXT: jne .LBB4_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax		; X64-NEXT: movzbl 2(%rdi), %eax
; X64-NEXT: movzbl 2(%rsi), %ecx		; X64-NEXT: movzbl 2(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 36 Lines
}		}

define i32 @length4(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length4(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length4:		; X86-LABEL: length4:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: seta %al		; X86-NEXT: seta %al
; X86-NEXT: sbbl $0, %eax		; X86-NEXT: sbbl $0, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4:		; X64-LABEL: length4:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
ret i32 %m		ret i32 %m
Show All 40 Lines

define i32 @length5(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length5(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length5:		; X86-LABEL: length5:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB9_1		; X86-NEXT: jne .LBB9_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB9_3		; X86-NEXT: jmp .LBB9_3
; X86-NEXT: .LBB9_1: # %res_block		; X86-NEXT: .LBB9_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB9_3: # %endblock		; X86-NEXT: .LBB9_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5:		; X64-LABEL: length5:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB9_1		; X64-NEXT: jne .LBB9_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 37 Lines

define i32 @length8(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length8(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length8:		; X86-LABEL: length8:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%esi), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB11_2		; X86-NEXT: jne .LBB11_2
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: je .LBB11_3		; X86-NEXT: je .LBB11_3
; X86-NEXT: .LBB11_2: # %res_block		; X86-NEXT: .LBB11_2: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB11_3: # %endblock		; X86-NEXT: .LBB11_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length8:		; X64-LABEL: length8:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
ret i32 %m		ret i32 %m
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length12:		; X64-LABEL: length12:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB15_2		; X64-NEXT: jne .LBB15_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx		; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB15_3		; X64-NEXT: je .LBB15_3
; X64-NEXT: .LBB15_2: # %res_block		; X64-NEXT: .LBB15_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
Show All 15 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16:		; X64-LABEL: length16:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB16_2		; X64-NEXT: jne .LBB16_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB16_3		; X64-NEXT: je .LBB16_3
; X64-NEXT: .LBB16_2: # %res_block		; X64-NEXT: .LBB16_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 494 Lines • ▼ Show 20 Lines	; X64-AVX2-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
%c = icmp eq i32 %m, 0		%c = icmp eq i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: bcmp_length2:		; X86-LABEL: bcmp_length2:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: bcmp_length2:		; X64-LABEL: bcmp_length2:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

!llvm.module.flags = !{!0}		!llvm.module.flags = !{!0}
Show All 15 Lines

llvm/test/CodeGen/X86/memcmp.ll

Show First 20 Lines • Show All 62 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind
%c = icmp slt i32 %m, 0		%c = icmp slt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i32 @length2(i8* %X, i8* %Y) nounwind {		define i32 @length2(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2:		; X86-LABEL: length2:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2:		; X64-LABEL: length2:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

define i32 @length2_const(i8* %X, i8* %Y) nounwind {		define i32 @length2_const(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_const:		; X86-LABEL: length2_const:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax		; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl .L.str+1, %ecx
; X86-NEXT: rolw $8, %ax		; X86-NEXT: rolw $8, %ax
; X86-NEXT: rolw $8, %cx
; X86-NEXT: movzwl %ax, %eax		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl %cx, %ecx		; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
		courbetAuthorUnsubmitted Done Reply Inline Actions This is the real change. courbet: This is the real change.
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_const:		; X64-LABEL: length2_const:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

define i1 @length2_gt_const(i8* %X, i8* %Y) nounwind {		define i1 @length2_gt_const(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_gt_const:		; X86-LABEL: length2_gt_const:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax		; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl .L.str+1, %ecx
; X86-NEXT: rolw $8, %ax		; X86-NEXT: rolw $8, %ax
; X86-NEXT: rolw $8, %cx
; X86-NEXT: movzwl %ax, %eax		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl %cx, %ecx		; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: testl %eax, %eax		; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_gt_const:		; X64-LABEL: length2_gt_const:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax		; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al		; X64-NEXT: setg %al
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
%c = icmp sgt i32 %m, 0		%c = icmp sgt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

Show All 16 Lines	; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
%c = icmp eq i32 %m, 0		%c = icmp eq i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i1 @length2_lt(i8* %X, i8* %Y) nounwind {		define i1 @length2_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_lt:		; X86-LABEL: length2_lt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_lt:		; X64-LABEL: length2_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
%c = icmp slt i32 %m, 0		%c = icmp slt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i1 @length2_gt(i8* %X, i8* %Y) nounwind {		define i1 @length2_gt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_gt:		; X86-LABEL: length2_gt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %ax
; X86-NEXT: movzwl %cx, %ecx		; X86-NEXT: movzwl %cx, %ecx
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
; X86-NEXT: movzwl %ax, %eax		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: subl %eax, %ecx		; X86-NEXT: subl %eax, %ecx
; X86-NEXT: testl %ecx, %ecx		; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_gt:		; X64-LABEL: length2_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax		; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al		; X64-NEXT: setg %al
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
%c = icmp sgt i32 %m, 0		%c = icmp sgt i32 %m, 0
ret i1 %c		ret i1 %c
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines

define i32 @length3(i8* %X, i8* %Y) nounwind {		define i32 @length3(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length3:		; X86-LABEL: length3:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %edx		; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx		; X86-NEXT: rolw $8, %dx
		; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %si		; X86-NEXT: rolw $8, %si
; X86-NEXT: cmpw %si, %dx		; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB11_1		; X86-NEXT: jne .LBB11_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax		; X86-NEXT: movzbl 2(%eax), %eax
; X86-NEXT: movzbl 2(%ecx), %ecx		; X86-NEXT: movzbl 2(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
; X86-NEXT: .LBB11_1: # %res_block		; X86-NEXT: .LBB11_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length3:		; X64-LABEL: length3:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
		; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %cx		; X64-NEXT: rolw $8, %cx
; X64-NEXT: cmpw %cx, %ax		; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB11_1		; X64-NEXT: jne .LBB11_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax		; X64-NEXT: movzbl 2(%rdi), %eax
; X64-NEXT: movzbl 2(%rsi), %ecx		; X64-NEXT: movzbl 2(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 36 Lines
}		}

define i32 @length4(i8* %X, i8* %Y) nounwind {		define i32 @length4(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length4:		; X86-LABEL: length4:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: seta %al		; X86-NEXT: seta %al
; X86-NEXT: sbbl $0, %eax		; X86-NEXT: sbbl $0, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4:		; X64-LABEL: length4:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
ret i32 %m		ret i32 %m
Show All 21 Lines
}		}

define i1 @length4_lt(i8* %X, i8* %Y) nounwind {		define i1 @length4_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length4_lt:		; X86-LABEL: length4_lt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: seta %al		; X86-NEXT: seta %al
; X86-NEXT: sbbl $0, %eax		; X86-NEXT: sbbl $0, %eax
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4_lt:		; X64-LABEL: length4_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl (%rsi), %edx
		efriedmaUnsubmitted Not Done Reply Inline Actions The scheduling here seems to be worse? efriedma: The scheduling here seems to be worse?
		courbetAuthorUnsubmitted Done Reply Inline Actions I think we could argue either way: the new scheduling interleaves loads and other computations, evening out the port pressure and increased compute parallelism. On the other hand it's true that there is less data parallelism. It should not matter with recent out-of-order cores anyway. If you feel strongly about this I can go back to interleaving the data, this is actually orthogonal to the real change. courbet: I think we could argue either way: the new scheduling interleaves loads and other computations…
		efriedmaUnsubmitted Not Done Reply Inline Actions I'd prefer to avoid unrelated changes. the new scheduling interleaves loads and other computations Scheduling an arithmetic operation that uses a loaded value immediately after the load is never going to work out; loads have latency. Granted, I agree it's unlikely to matter much on a modern x86 core. Really, I'm more surprised we aren't trying to do any scheduling at all after isel. efriedma: I'd prefer to avoid unrelated changes. > the new scheduling interleaves loads and other…
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
%c = icmp slt i32 %m, 0		%c = icmp slt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i1 @length4_gt(i8* %X, i8* %Y) nounwind {		define i1 @length4_gt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length4_gt:		; X86-LABEL: length4_gt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: xorl %edx, %edx		; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpl %eax, %ecx		; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: seta %dl		; X86-NEXT: seta %dl
; X86-NEXT: sbbl $0, %edx		; X86-NEXT: sbbl $0, %edx
; X86-NEXT: testl %edx, %edx		; X86-NEXT: testl %edx, %edx
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4_gt:		; X64-LABEL: length4_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: seta %dl		; X64-NEXT: seta %dl
; X64-NEXT: sbbl $0, %edx		; X64-NEXT: sbbl $0, %edx
; X64-NEXT: testl %edx, %edx		; X64-NEXT: testl %edx, %edx
; X64-NEXT: setg %al		; X64-NEXT: setg %al
; X64-NEXT: retq		; X64-NEXT: retq
Show All 22 Lines

define i32 @length5(i8* %X, i8* %Y) nounwind {		define i32 @length5(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length5:		; X86-LABEL: length5:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB18_1		; X86-NEXT: jne .LBB18_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
; X86-NEXT: .LBB18_1: # %res_block		; X86-NEXT: .LBB18_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5:		; X64-LABEL: length5:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB18_1		; X64-NEXT: jne .LBB18_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 37 Lines

define i1 @length5_lt(i8* %X, i8* %Y) nounwind {		define i1 @length5_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length5_lt:		; X86-LABEL: length5_lt:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB20_1		; X86-NEXT: jne .LBB20_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB20_3		; X86-NEXT: jmp .LBB20_3
; X86-NEXT: .LBB20_1: # %res_block		; X86-NEXT: .LBB20_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB20_3: # %endblock		; X86-NEXT: .LBB20_3: # %endblock
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5_lt:		; X64-LABEL: length5_lt:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB20_1		; X64-NEXT: jne .LBB20_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
Show All 40 Lines

define i32 @length8(i8* %X, i8* %Y) nounwind {		define i32 @length8(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length8:		; X86-LABEL: length8:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%esi), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB22_2		; X86-NEXT: jne .LBB22_2
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: je .LBB22_3		; X86-NEXT: je .LBB22_3
; X86-NEXT: .LBB22_2: # %res_block		; X86-NEXT: .LBB22_2: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB22_3: # %endblock		; X86-NEXT: .LBB22_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length8:		; X64-LABEL: length8:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
ret i32 %m		ret i32 %m
▲ Show 20 Lines • Show All 165 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length12:		; X64-LABEL: length12:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB29_2		; X64-NEXT: jne .LBB29_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx		; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB29_3		; X64-NEXT: je .LBB29_3
; X64-NEXT: .LBB29_2: # %res_block		; X64-NEXT: .LBB29_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16:		; X64-LABEL: length16:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB35_2		; X64-NEXT: jne .LBB35_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB35_3		; X64-NEXT: je .LBB35_3
; X64-NEXT: .LBB35_2: # %res_block		; X64-NEXT: .LBB35_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 104 Lines • ▼ Show 20 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16_lt:		; X64-LABEL: length16_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB37_2		; X64-NEXT: jne .LBB37_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB37_3		; X64-NEXT: je .LBB37_3
; X64-NEXT: .LBB37_2: # %res_block		; X64-NEXT: .LBB37_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
Show All 18 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: testl %eax, %eax		; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16_gt:		; X64-LABEL: length16_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax		; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB38_2		; X64-NEXT: jne .LBB38_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rax		; X64-NEXT: movq 8(%rdi), %rax
; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: je .LBB38_3		; X64-NEXT: je .LBB38_3
; X64-NEXT: .LBB38_2: # %res_block		; X64-NEXT: .LBB38_2: # %res_block
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: setae %dl		; X64-NEXT: setae %dl
▲ Show 20 Lines • Show All 3,566 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[ExpandMemCmp] Properly constant-fold all compares.
ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 247879

llvm/lib/CodeGen/ExpandMemCmp.cpp

llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll

llvm/test/CodeGen/X86/memcmp-optsize.ll

llvm/test/CodeGen/X86/memcmp-pgso.ll

llvm/test/CodeGen/X86/memcmp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ExpandMemCmp] Properly constant-fold all compares.ClosedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 247879

llvm/lib/CodeGen/ExpandMemCmp.cpp

llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll

llvm/test/CodeGen/X86/memcmp-optsize.ll

llvm/test/CodeGen/X86/memcmp-pgso.ll

llvm/test/CodeGen/X86/memcmp.ll

[ExpandMemCmp] Properly constant-fold all compares.
ClosedPublic