Diff 247895

llvm/lib/CodeGen/ExpandMemCmp.cpp

Show First 20 Lines • Show All 97 Lines • ▼ Show 20 Lines	class MemCmpExpansion {
void emitLoadCompareBlock(unsigned BlockIndex);		void emitLoadCompareBlock(unsigned BlockIndex);
void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,		void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
unsigned &LoadIndex);		unsigned &LoadIndex);
void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);		void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
void emitMemCmpResultBlock();		void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase();		Value *getMemCmpExpansionZeroCase();
Value *getMemCmpEqZeroOneBlock();		Value *getMemCmpEqZeroOneBlock();
Value *getMemCmpOneBlock();		Value *getMemCmpOneBlock();
Value getPtrToElementAtOffset(Value Source, Type *LoadSizeType,		struct LoadPair {
uint64_t OffsetBytes);		Value *Lhs = nullptr;
		Value *Rhs = nullptr;
		};
		LoadPair getLoadPair(Type LoadSizeType, bool NeedsBSwap, Type CmpSizeType,
		unsigned OffsetBytes);

static LoadEntryVector		static LoadEntryVector
computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,		computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);		unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
static LoadEntryVector		static LoadEntryVector
computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,		computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
unsigned MaxNumLoads,		unsigned MaxNumLoads,
unsigned &NumLoadsNonOneByte);		unsigned &NumLoadsNonOneByte);
▲ Show 20 Lines • Show All 140 Lines • ▼ Show 20 Lines	void MemCmpExpansion::createLoadCmpBlocks() {
}		}
}		}

void MemCmpExpansion::createResultBlock() {		void MemCmpExpansion::createResultBlock() {
ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",		ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
EndBlock->getParent(), EndBlock);		EndBlock->getParent(), EndBlock);
}		}

/// Return a pointer to an element of type `LoadSizeType` at offset		MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
/// `OffsetBytes`.		bool NeedsBSwap,
Value MemCmpExpansion::getPtrToElementAtOffset(Value Source,		Type *CmpSizeType,
Type *LoadSizeType,		unsigned OffsetBytes) {
uint64_t OffsetBytes) {		const auto MakeValue = [this, LoadSizeType, NeedsBSwap, CmpSizeType,
		OffsetBytes](int Index) {
		// Get the memory source at offset `OffsetBytes`.
		Value *Source = CI->getArgOperand(Index);
if (OffsetBytes > 0) {		if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());		auto *ByteType = Type::getInt8Ty(CI->getContext());
Source = Builder.CreateConstGEP1_64(		Source = Builder.CreateConstGEP1_64(
ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),		ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),
OffsetBytes);		OffsetBytes);
}		}
return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());		Source = Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());

		// Create a constant or a load from the source.
		Value *V = nullptr;
		if (auto *C = dyn_cast<Constant>(Source))
		V = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
		if (!V)
		V = Builder.CreateLoad(LoadSizeType, Source);

		// Swap bytes if required.
		if (NeedsBSwap) {
		Function *Bswap = Intrinsic::getDeclaration(
		CI->getModule(), Intrinsic::bswap, LoadSizeType);
		V = Builder.CreateCall(Bswap, V);
		}

		// Zero extend if required.
		if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType)
		V = Builder.CreateZExt(V, CmpSizeType);
		return V;
		};
		return {MakeValue(0), MakeValue(1)};
}		}

// This function creates the IR instructions for loading and comparing 1 byte.		// This function creates the IR instructions for loading and comparing 1 byte.
// It loads 1 byte from each source of the memcmp parameters with the given		// It loads 1 byte from each source of the memcmp parameters with the given
// GEPIndex. It then subtracts the two loaded values and adds this result to the		// GEPIndex. It then subtracts the two loaded values and adds this result to the
// final phi node for selecting the memcmp result.		// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,		void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
unsigned OffsetBytes) {		unsigned OffsetBytes) {
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);		Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
Type *LoadSizeType = Type::getInt8Ty(CI->getContext());		const LoadPair Loads =
Value *Source1 =		getLoadPair(Type::getInt8Ty(CI->getContext()), /NeedsBSwap=/false,
getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes);		Type::getInt32Ty(CI->getContext()), OffsetBytes);
Value *Source2 =		Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes);

Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);

PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);		PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);

if (BlockIndex < (LoadCmpBlocks.size() - 1)) {		if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
// Early exit branch if difference found to EndBlock. Otherwise, continue to		// Early exit branch if difference found to EndBlock. Otherwise, continue to
// next LoadCmpBlock,		// next LoadCmpBlock,
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,		Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
ConstantInt::get(Diff->getType(), 0));		ConstantInt::get(Diff->getType(), 0));
Show All 30 Lines	Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
// If we have multiple loads per block, we need to generate a composite		// If we have multiple loads per block, we need to generate a composite
// comparison using xor+or. The type for the combinations is the largest load		// comparison using xor+or. The type for the combinations is the largest load
// type.		// type.
IntegerType *const MaxLoadType =		IntegerType *const MaxLoadType =
NumLoads == 1 ? nullptr		NumLoads == 1 ? nullptr
: IntegerType::get(CI->getContext(), MaxLoadSize * 8);		: IntegerType::get(CI->getContext(), MaxLoadSize * 8);
for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {		for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];		const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
		const LoadPair Loads = getLoadPair(
IntegerType *LoadSizeType =		IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);		/NeedsBSwap=/false, MaxLoadType, CurLoadEntry.Offset);

Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
CurLoadEntry.Offset);
Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
CurLoadEntry.Offset);

// Get a constant or load a value for each source address.
Value *LoadSrc1 = nullptr;
if (auto *Source1C = dyn_cast<Constant>(Source1))
LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
if (!LoadSrc1)
LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);

Value *LoadSrc2 = nullptr;
if (auto *Source2C = dyn_cast<Constant>(Source2))
LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
if (!LoadSrc2)
LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

if (NumLoads != 1) {		if (NumLoads != 1) {
if (LoadSizeType != MaxLoadType) {
LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}
// If we have multiple loads per block, we need to generate a composite		// If we have multiple loads per block, we need to generate a composite
// comparison using xor+or.		// comparison using xor+or.
Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);		Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
Diff = Builder.CreateZExt(Diff, MaxLoadType);		Diff = Builder.CreateZExt(Diff, MaxLoadType);
XorList.push_back(Diff);		XorList.push_back(Diff);
} else {		} else {
// If there's only one load per block, we just compare the loaded values.		// If there's only one load per block, we just compare the loaded values.
Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);		Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
}		}
}		}

auto pairWiseOr = [&](std::vector<Value > &InList) -> std::vector<Value > {		auto pairWiseOr = [&](std::vector<Value > &InList) -> std::vector<Value > {
std::vector<Value *> OutList;		std::vector<Value *> OutList;
for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {		for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);		Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
OutList.push_back(Or);		OutList.push_back(Or);
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {

Type *LoadSizeType =		Type *LoadSizeType =
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);		IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
Type MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize 8);		Type MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize 8);
assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");		assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");

Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);		Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);

Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,		const LoadPair Loads =
CurLoadEntry.Offset);		getLoadPair(LoadSizeType, /NeedsBSwap=/DL.isLittleEndian(), MaxLoadType,
Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
CurLoadEntry.Offset);		CurLoadEntry.Offset);

// Load LoadSizeType from the base address.
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

if (DL.isLittleEndian()) {
Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::bswap, LoadSizeType);
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
}

if (LoadSizeType != MaxLoadType) {
LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}

// Add the loaded values to the phi nodes for calculating memcmp result only		// Add the loaded values to the phi nodes for calculating memcmp result only
// if result is not used in a zero equality.		// if result is not used in a zero equality.
if (!IsUsedForZeroCmp) {		if (!IsUsedForZeroCmp) {
ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]);		ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]);		ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
}		}

Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);		Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))		BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
? EndBlock		? EndBlock
: LoadCmpBlocks[BlockIndex + 1];		: LoadCmpBlocks[BlockIndex + 1];
// Early exit branch if difference found to ResultBlock. Otherwise, continue		// Early exit branch if difference found to ResultBlock. Otherwise, continue
// to next LoadCmpBlock or EndBlock.		// to next LoadCmpBlock or EndBlock.
BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);		BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
Builder.Insert(CmpBr);		Builder.Insert(CmpBr);

▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
assert(LoadIndex == getNumLoads() && "some entries were not consumed");		assert(LoadIndex == getNumLoads() && "some entries were not consumed");
return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));		return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
}		}

/// A memcmp expansion that only has one block of load and compare can bypass		/// A memcmp expansion that only has one block of load and compare can bypass
/// the compare, branch, and phi IR that is required in the general case.		/// the compare, branch, and phi IR that is required in the general case.
Value *MemCmpExpansion::getMemCmpOneBlock() {		Value *MemCmpExpansion::getMemCmpOneBlock() {
Type LoadSizeType = IntegerType::get(CI->getContext(), Size 8);		Type LoadSizeType = IntegerType::get(CI->getContext(), Size 8);
Value *Source1 = CI->getArgOperand(0);		bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
Value *Source2 = CI->getArgOperand(1);

// Cast source to LoadSizeType*.
if (Source1->getType() != LoadSizeType)
Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
if (Source2->getType() != LoadSizeType)
Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());

// Load LoadSizeType from the base address.
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

if (DL.isLittleEndian() && Size != 1) {
Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::bswap, LoadSizeType);
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
}

if (Size < 4) {
// The i8 and i16 cases don't need compares. We zext the loaded values and		// The i8 and i16 cases don't need compares. We zext the loaded values and
// subtract them to get the suitable negative, zero, or positive i32 result.		// subtract them to get the suitable negative, zero, or positive i32 result.
LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty());		if (Size < 4) {
LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty());		const LoadPair Loads =
return Builder.CreateSub(LoadSrc1, LoadSrc2);		getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
		/Offset/ 0);
		return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
}		}

		const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
		/Offset/ 0);
// The result of memcmp is negative, zero, or positive, so produce that by		// The result of memcmp is negative, zero, or positive, so produce that by
// subtracting 2 extended compare bits: sub (ugt, ult).		// subtracting 2 extended compare bits: sub (ugt, ult).
// If a target prefers to use selects to get -1/0/1, they should be able		// If a target prefers to use selects to get -1/0/1, they should be able
// to transform this later. The inverse transform (going from selects to math)		// to transform this later. The inverse transform (going from selects to math)
// may not be possible in the DAG because the selects got converted into		// may not be possible in the DAG because the selects got converted into
// branches before we got there.		// branches before we got there.
Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2);		Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);		Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());		Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());		Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
return Builder.CreateSub(ZextUGT, ZextULT);		return Builder.CreateSub(ZextUGT, ZextULT);
}		}

// This function expands the memcmp call into an inline expansion and returns		// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.		// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion() {		Value *MemCmpExpansion::getMemCmpExpansion() {
▲ Show 20 Lines • Show All 275 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

Show First 20 Lines • Show All 84 Lines • ▼ Show 20 Lines	; CHECK-NEXT: blr
%cond = zext i1 %not.lnot to i32		%cond = zext i1 %not.lnot to i32
ret i32 %cond		ret i32 %cond
}		}

; Validate with > 0		; Validate with > 0
define signext i32 @zeroEqualityTest04() {		define signext i32 @zeroEqualityTest04() {
; CHECK-LABEL: zeroEqualityTest04:		; CHECK-LABEL: zeroEqualityTest04:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest02.buffer1@toc@ha		; CHECK-NEXT: b .LBB3_2
courbetAuthorUnsubmitted Done Reply Inline Actions These are also real changes. courbet: These are also real changes.
; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest02.buffer2@toc@ha
; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest02.buffer1@toc@l
; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l
; CHECK-NEXT: ldbrx 3, 0, 6
; CHECK-NEXT: ldbrx 4, 0, 5
; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: bne 0, .LBB3_2
; CHECK-NEXT: # %bb.1: # %loadbb1		; CHECK-NEXT: # %bb.1: # %loadbb1
; CHECK-NEXT: li 4, 8		; CHECK-NEXT: li 3, 0
; CHECK-NEXT: ldbrx 3, 6, 4
; CHECK-NEXT: ldbrx 4, 5, 4
; CHECK-NEXT: li 5, 0		; CHECK-NEXT: li 5, 0
; CHECK-NEXT: cmpld 3, 4		; CHECK-NEXT: li 4, 0
; CHECK-NEXT: beq 0, .LBB3_3		; CHECK-NEXT: b .LBB3_4
; CHECK-NEXT: .LBB3_2: # %res_block		; CHECK-NEXT: .LBB3_2:
		; CHECK-NEXT: li 3, 1
		; CHECK-NEXT: li 4, 3
		; CHECK-NEXT: sldi 3, 3, 58
		; CHECK-NEXT: sldi 4, 4, 56
		; CHECK-NEXT: # %bb.3: # %res_block
; CHECK-NEXT: cmpld 3, 4		; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: li 3, 1		; CHECK-NEXT: li 3, 1
; CHECK-NEXT: li 4, -1		; CHECK-NEXT: li 4, -1
; CHECK-NEXT: isel 5, 4, 3, 0		; CHECK-NEXT: isel 5, 4, 3, 0
; CHECK-NEXT: .LBB3_3: # %endblock		; CHECK-NEXT: .LBB3_4: # %endblock
; CHECK-NEXT: extsw 3, 5		; CHECK-NEXT: extsw 3, 5
; CHECK-NEXT: neg 3, 3		; CHECK-NEXT: neg 3, 3
; CHECK-NEXT: rldicl 3, 3, 1, 63		; CHECK-NEXT: rldicl 3, 3, 1, 63
; CHECK-NEXT: xori 3, 3, 1		; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr		; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8), i8 bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)		%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8), i8 bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)
%not.cmp = icmp slt i32 %call, 1		%not.cmp = icmp slt i32 %call, 1
%. = zext i1 %not.cmp to i32		%. = zext i1 %not.cmp to i32
ret i32 %.		ret i32 %.
}		}

; Validate with < 0		; Validate with < 0
define signext i32 @zeroEqualityTest05() {		define signext i32 @zeroEqualityTest05() {
; CHECK-LABEL: zeroEqualityTest05:		; CHECK-LABEL: zeroEqualityTest05:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest03.buffer1@toc@ha		; CHECK-NEXT: li 3, 0
; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest03.buffer2@toc@ha		; CHECK-NEXT: li 4, 0
; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest03.buffer1@toc@l
; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l
; CHECK-NEXT: ldbrx 3, 0, 6
; CHECK-NEXT: ldbrx 4, 0, 5
; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: bne 0, .LBB4_2
; CHECK-NEXT: # %bb.1: # %loadbb1		; CHECK-NEXT: # %bb.1: # %loadbb1
; CHECK-NEXT: li 4, 8		; CHECK-NEXT: li 3, 0
; CHECK-NEXT: ldbrx 3, 6, 4		; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: ldbrx 4, 5, 4		; CHECK-NEXT: lis 3, 768
; CHECK-NEXT: li 5, 0		; CHECK-NEXT: lis 4, 1024
; CHECK-NEXT: cmpld 3, 4		; CHECK-NEXT: # %bb.3: # %res_block
; CHECK-NEXT: beq 0, .LBB4_3
; CHECK-NEXT: .LBB4_2: # %res_block
; CHECK-NEXT: cmpld 3, 4		; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: li 3, 1		; CHECK-NEXT: li 3, 1
; CHECK-NEXT: li 4, -1		; CHECK-NEXT: li 4, -1
; CHECK-NEXT: isel 5, 4, 3, 0		; CHECK-NEXT: isel 3, 4, 3, 0
; CHECK-NEXT: .LBB4_3: # %endblock		; CHECK-NEXT: # %bb.4: # %endblock
; CHECK-NEXT: nor 3, 5, 5		; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31		; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31
; CHECK-NEXT: blr		; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8), i8 bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16)		%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8), i8 bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16)
%call.lobit = lshr i32 %call, 31		%call.lobit = lshr i32 %call, 31
%call.lobit.not = xor i32 %call.lobit, 1		%call.lobit.not = xor i32 %call.lobit, 1
ret i32 %call.lobit.not		ret i32 %call.lobit.not
}		}

▲ Show 20 Lines • Show All 60 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/memcmpIR.ll

	; RUN: llc -o - -mtriple=powerpc64le-unknown-gnu-linux -stop-after codegenprepare %s \| FileCheck %s			; RUN: llc -o - -mtriple=powerpc64le-unknown-gnu-linux -stop-after codegenprepare %s \| FileCheck %s
	; RUN: llc -o - -mtriple=powerpc64-unknown-gnu-linux -stop-after codegenprepare %s \| FileCheck %s --check-prefix=CHECK-BE			; RUN: llc -o - -mtriple=powerpc64-unknown-gnu-linux -stop-after codegenprepare %s \| FileCheck %s --check-prefix=CHECK-BE

	define signext i32 @test1(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {			define signext i32 @test1(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
	entry:			entry:
	; CHECK-LABEL: @test1(			; CHECK-LABEL: @test1(
	; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64*			; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64*
	; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
	; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])			; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
				; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.}} to i64
				; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
	; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])			; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
	; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]			; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
	; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block			; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block

	; CHECK-LABEL: res_block:{{.*}}			; CHECK-LABEL: res_block:{{.*}}
	; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64			; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
	; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1			; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
	; CHECK-NEXT: br label %endblock			; CHECK-NEXT: br label %endblock

	; CHECK-LABEL: loadbb1:{{.*}}			; CHECK-LABEL: loadbb1:{{.*}}
	; CHECK: [[BCC1:%[0-9]+]] = bitcast i32* {{.}} to i8			; CHECK: [[BCC1:%[0-9]+]] = bitcast i32* {{.}} to i8
	; CHECK-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.}} to i8			; CHECK-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.}} to i8
	; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8			; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8
	; CHECK-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*			; CHECK-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
				; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
				; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
	; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8			; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8
	; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*			; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*
	; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
	; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]			; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]
	; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
	; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])			; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
	; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]			; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
	; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block			; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block

	; CHECK-BE-LABEL: @test1(			; CHECK-BE-LABEL: @test1(
	; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*			; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*
				; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i8* {{.}} to i64
	; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*			; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
	; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]			; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
	; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block			; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block

	; CHECK-BE-LABEL: res_block:{{.*}}			; CHECK-BE-LABEL: res_block:{{.*}}
	; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64			; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
	; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1			; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
	; CHECK-BE-NEXT: br label %endblock			; CHECK-BE-NEXT: br label %endblock

	; CHECK-BE-LABEL: loadbb1:{{.*}}			; CHECK-BE-LABEL: loadbb1:{{.*}}
	; CHECK-BE: [[BCC1:%[0-9]+]] = bitcast i32* {{.}} to i8			; CHECK-BE: [[BCC1:%[0-9]+]] = bitcast i32* {{.}} to i8
	; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.}} to i8			; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.}} to i8
	; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8			; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8
	; CHECK-BE-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*			; CHECK-BE-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
				; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
	; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8			; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8
	; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*			; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*
	; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
	; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]			; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]
	; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]			; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
	; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block			; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block

	%0 = bitcast i32* %buffer1 to i8*			%0 = bitcast i32* %buffer1 to i8*
	%1 = bitcast i32* %buffer2 to i8*			%1 = bitcast i32* %buffer2 to i8*
	%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 16)			%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 16)
	ret i32 %call			ret i32 %call
	}			}

	declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1			declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1

	define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {			define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
	; CHECK-LABEL: @test2(			; CHECK-LABEL: @test2(
	; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32*			; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32*
	; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
	; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])			; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
				; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.}} to i32
				; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
	; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])			; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
	; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[BSWAP1]], [[BSWAP2]]			; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[BSWAP1]], [[BSWAP2]]
	; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]]			; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]]
	; CHECK-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32			; CHECK-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
	; CHECK-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32			; CHECK-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
	; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]			; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
	; CHECK-NEXT: ret i32 [[SUB]]			; CHECK-NEXT: ret i32 [[SUB]]

	; CHECK-BE-LABEL: @test2(			; CHECK-BE-LABEL: @test2(
	; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32*			; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32*
				; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.}} to i32
	; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*			; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
	; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]]			; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]]
	; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]			; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]]
	; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32			; CHECK-BE-NEXT: [[Z1:%[0-9]+]] = zext i1 [[CMP1]] to i32
	; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32			; CHECK-BE-NEXT: [[Z2:%[0-9]+]] = zext i1 [[CMP2]] to i32
	; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]			; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[Z1]], [[Z2]]
	; CHECK-BE-NEXT: ret i32 [[SUB]]			; CHECK-BE-NEXT: ret i32 [[SUB]]

	entry:			entry:
	%0 = bitcast i32* %buffer1 to i8*			%0 = bitcast i32* %buffer1 to i8*
	%1 = bitcast i32* %buffer2 to i8*			%1 = bitcast i32* %buffer2 to i8*
	%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 4)			%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 4)
	ret i32 %call			ret i32 %call
	}			}

	define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {			define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) {
	; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64*			; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64*
	; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
	; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])			; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
				; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.}} to i64
				; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
	; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])			; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
	; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]			; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
	; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block			; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block

	; CHECK-LABEL: res_block:{{.*}}			; CHECK-LABEL: res_block:{{.*}}
	; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64			; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
	; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1			; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
	; CHECK-NEXT: br label %endblock			; CHECK-NEXT: br label %endblock

	; CHECK-LABEL: loadbb1:{{.*}}			; CHECK-LABEL: loadbb1:{{.*}}
	; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32*			; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32*
	; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
	; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])			; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]])
	; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
	; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64			; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64
				; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 8
				; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.}} to i32
				; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
				; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]])
	; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64			; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64
	; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]			; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
	; CHECK-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block			; CHECK-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block

	; CHECK-LABEL: loadbb2:{{.*}}			; CHECK-LABEL: loadbb2:{{.*}}
	; CHECK: [[LOAD1:%[0-9]+]] = load i16, i16*			; CHECK: [[LOAD1:%[0-9]+]] = load i16, i16*
	; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16*
	; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD1]])			; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD1]])
	; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]])
	; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[BSWAP1]] to i64			; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[BSWAP1]] to i64
				; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 12
				; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.}} to i16
				; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16*
				; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]])
	; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[BSWAP2]] to i64			; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[BSWAP2]] to i64
	; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]			; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
	; CHECK-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block			; CHECK-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block

	; CHECK-LABEL: loadbb3:{{.*}}			; CHECK-LABEL: loadbb3:{{.*}}
	; CHECK: [[LOAD1:%[0-9]+]] = load i8, i8*			; CHECK: [[LOAD1:%[0-9]+]] = load i8, i8*
	; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8*
	; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32			; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32
				; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 14
				; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8*
	; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32			; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32
	; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]			; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
	; CHECK-NEXT: br label %endblock			; CHECK-NEXT: br label %endblock

	; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*			; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*
				; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.}} to i64
	; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*			; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
	; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]			; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
	; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block			; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block

	; CHECK-BE-LABEL: res_block:{{.*}}			; CHECK-BE-LABEL: res_block:{{.*}}
	; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64			; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
	; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1			; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
	; CHECK-BE-NEXT: br label %endblock			; CHECK-BE-NEXT: br label %endblock

				; CHECK-BE-LABEL: loadbb1:{{.*}}
	; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32*			; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32*
	; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
	; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64			; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64
				; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 8
				; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.}} to i32
				; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32*
	; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64			; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64
	; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]			; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
	; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block			; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block

				; CHECK-BE-LABEL: loadbb2:{{.*}}
	; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, i16*			; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, i16*
	; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16*
	; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64			; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64
				; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 12
				; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.}} to i16
				; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16*
	; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64			; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64
	; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]			; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]]
	; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block			; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block

				; CHECK-BE-LABEL: loadbb3:{{.*}}
	; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, i8*			; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, i8*
	; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8*
	; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32			; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32
				; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 14
				; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8*
	; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32			; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32
	; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]			; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
	; CHECK-BE-NEXT: br label %endblock			; CHECK-BE-NEXT: br label %endblock

	entry:			entry:
	%0 = bitcast i32* %buffer1 to i8*			%0 = bitcast i32* %buffer1 to i8*
	%1 = bitcast i32* %buffer2 to i8*			%1 = bitcast i32* %buffer2 to i8*
	%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 15)			%call = tail call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
	Show All 23 Lines

llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll

Show First 20 Lines • Show All 63 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind
%c = icmp slt i32 %m, 0		%c = icmp slt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i32 @length2(i8* %X, i8* %Y) nounwind {		define i32 @length2(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2:		; X86-LABEL: length2:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2:		; X64-LABEL: length2:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

define i1 @length2_eq(i8* %X, i8* %Y) nounwind {		define i1 @length2_eq(i8* %X, i8* %Y) nounwind {
Show All 15 Lines	; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
%c = icmp eq i32 %m, 0		%c = icmp eq i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i1 @length2_lt(i8* %X, i8* %Y) nounwind {		define i1 @length2_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_lt:		; X86-LABEL: length2_lt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_lt:		; X64-LABEL: length2_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
%c = icmp slt i32 %m, 0		%c = icmp slt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i1 @length2_gt(i8* %X, i8* %Y) nounwind {		define i1 @length2_gt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_gt:		; X86-LABEL: length2_gt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %ax
; X86-NEXT: movzwl %cx, %ecx		; X86-NEXT: movzwl %cx, %ecx
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
; X86-NEXT: movzwl %ax, %eax		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: subl %eax, %ecx		; X86-NEXT: subl %eax, %ecx
; X86-NEXT: testl %ecx, %ecx		; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_gt:		; X64-LABEL: length2_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax		; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al		; X64-NEXT: setg %al
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
%c = icmp sgt i32 %m, 0		%c = icmp sgt i32 %m, 0
ret i1 %c		ret i1 %c
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines

define i32 @length3(i8* %X, i8* %Y) nounwind {		define i32 @length3(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length3:		; X86-LABEL: length3:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %edx		; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx		; X86-NEXT: rolw $8, %dx
		; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %si		; X86-NEXT: rolw $8, %si
; X86-NEXT: cmpw %si, %dx		; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB9_1		; X86-NEXT: jne .LBB9_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax		; X86-NEXT: movzbl 2(%eax), %eax
; X86-NEXT: movzbl 2(%ecx), %ecx		; X86-NEXT: movzbl 2(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
; X86-NEXT: .LBB9_1: # %res_block		; X86-NEXT: .LBB9_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length3:		; X64-LABEL: length3:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
		; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %cx		; X64-NEXT: rolw $8, %cx
; X64-NEXT: cmpw %cx, %ax		; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB9_1		; X64-NEXT: jne .LBB9_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax		; X64-NEXT: movzbl 2(%rdi), %eax
; X64-NEXT: movzbl 2(%rsi), %ecx		; X64-NEXT: movzbl 2(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 36 Lines
}		}

define i32 @length4(i8* %X, i8* %Y) nounwind {		define i32 @length4(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length4:		; X86-LABEL: length4:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: seta %al		; X86-NEXT: seta %al
; X86-NEXT: sbbl $0, %eax		; X86-NEXT: sbbl $0, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4:		; X64-LABEL: length4:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
ret i32 %m		ret i32 %m
Show All 21 Lines
}		}

define i1 @length4_lt(i8* %X, i8* %Y) nounwind {		define i1 @length4_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length4_lt:		; X86-LABEL: length4_lt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: seta %al		; X86-NEXT: seta %al
; X86-NEXT: sbbl $0, %eax		; X86-NEXT: sbbl $0, %eax
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4_lt:		; X64-LABEL: length4_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
%c = icmp slt i32 %m, 0		%c = icmp slt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i1 @length4_gt(i8* %X, i8* %Y) nounwind {		define i1 @length4_gt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length4_gt:		; X86-LABEL: length4_gt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: xorl %edx, %edx		; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpl %eax, %ecx		; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: seta %dl		; X86-NEXT: seta %dl
; X86-NEXT: sbbl $0, %edx		; X86-NEXT: sbbl $0, %edx
; X86-NEXT: testl %edx, %edx		; X86-NEXT: testl %edx, %edx
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4_gt:		; X64-LABEL: length4_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: seta %dl		; X64-NEXT: seta %dl
; X64-NEXT: sbbl $0, %edx		; X64-NEXT: sbbl $0, %edx
; X64-NEXT: testl %edx, %edx		; X64-NEXT: testl %edx, %edx
; X64-NEXT: setg %al		; X64-NEXT: setg %al
; X64-NEXT: retq		; X64-NEXT: retq
Show All 22 Lines

define i32 @length5(i8* %X, i8* %Y) nounwind {		define i32 @length5(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length5:		; X86-LABEL: length5:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB16_1		; X86-NEXT: jne .LBB16_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
; X86-NEXT: .LBB16_1: # %res_block		; X86-NEXT: .LBB16_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5:		; X64-LABEL: length5:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB16_1		; X64-NEXT: jne .LBB16_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 37 Lines

define i1 @length5_lt(i8* %X, i8* %Y) nounwind {		define i1 @length5_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length5_lt:		; X86-LABEL: length5_lt:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB18_1		; X86-NEXT: jne .LBB18_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB18_3		; X86-NEXT: jmp .LBB18_3
; X86-NEXT: .LBB18_1: # %res_block		; X86-NEXT: .LBB18_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB18_3: # %endblock		; X86-NEXT: .LBB18_3: # %endblock
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5_lt:		; X64-LABEL: length5_lt:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB18_1		; X64-NEXT: jne .LBB18_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
Show All 40 Lines

define i32 @length8(i8* %X, i8* %Y) nounwind {		define i32 @length8(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length8:		; X86-LABEL: length8:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%esi), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB20_2		; X86-NEXT: jne .LBB20_2
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: je .LBB20_3		; X86-NEXT: je .LBB20_3
; X86-NEXT: .LBB20_2: # %res_block		; X86-NEXT: .LBB20_2: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB20_3: # %endblock		; X86-NEXT: .LBB20_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length8:		; X64-LABEL: length8:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
ret i32 %m		ret i32 %m
▲ Show 20 Lines • Show All 180 Lines • ▼ Show 20 Lines

define i32 @length12(i8* %X, i8* %Y) nounwind {		define i32 @length12(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length12:		; X86-LABEL: length12:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%esi), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB27_3		; X86-NEXT: jne .LBB27_3
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB27_3		; X86-NEXT: jne .LBB27_3
; X86-NEXT: # %bb.2: # %loadbb2		; X86-NEXT: # %bb.2: # %loadbb2
; X86-NEXT: movl 8(%esi), %ecx		; X86-NEXT: movl 8(%esi), %ecx
; X86-NEXT: movl 8(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 8(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: je .LBB27_4		; X86-NEXT: je .LBB27_4
; X86-NEXT: .LBB27_3: # %res_block		; X86-NEXT: .LBB27_3: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB27_4: # %endblock		; X86-NEXT: .LBB27_4: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length12:		; X64-LABEL: length12:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB27_2		; X64-NEXT: jne .LBB27_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx		; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB27_3		; X64-NEXT: je .LBB27_3
; X64-NEXT: .LBB27_2: # %res_block		; X64-NEXT: .LBB27_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines

; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329		; PR33329 - https://bugs.llvm.org/show_bug.cgi?id=33329

define i32 @length16(i8* %X, i8* %Y) nounwind {		define i32 @length16(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length16:		; X86-LABEL: length16:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%edx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl (%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB31_4		; X86-NEXT: jne .LBB31_4
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%edx), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 4(%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB31_4		; X86-NEXT: jne .LBB31_4
; X86-NEXT: # %bb.2: # %loadbb2		; X86-NEXT: # %bb.2: # %loadbb2
; X86-NEXT: movl 8(%esi), %ecx		; X86-NEXT: movl 8(%edx), %ecx
; X86-NEXT: movl 8(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 8(%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB31_4		; X86-NEXT: jne .LBB31_4
; X86-NEXT: # %bb.3: # %loadbb3		; X86-NEXT: # %bb.3: # %loadbb3
; X86-NEXT: movl 12(%esi), %ecx		; X86-NEXT: movl 12(%edx), %ecx
; X86-NEXT: movl 12(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 12(%eax), %esi
		; X86-NEXT: bswapl %esi
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: je .LBB31_5		; X86-NEXT: je .LBB31_5
; X86-NEXT: .LBB31_4: # %res_block		; X86-NEXT: .LBB31_4: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB31_5: # %endblock		; X86-NEXT: .LBB31_5: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16:		; X64-LABEL: length16:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB31_2		; X64-NEXT: jne .LBB31_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB31_3		; X64-NEXT: je .LBB31_3
; X64-NEXT: .LBB31_2: # %res_block		; X64-NEXT: .LBB31_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines	; X64-MIC-AVX-NEXT: retq
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @length16_lt(i8* %x, i8* %y) nounwind {		define i1 @length16_lt(i8* %x, i8* %y) nounwind {
; X86-LABEL: length16_lt:		; X86-LABEL: length16_lt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%edx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl (%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB33_4		; X86-NEXT: jne .LBB33_4
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%edx), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 4(%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB33_4		; X86-NEXT: jne .LBB33_4
; X86-NEXT: # %bb.2: # %loadbb2		; X86-NEXT: # %bb.2: # %loadbb2
; X86-NEXT: movl 8(%esi), %ecx		; X86-NEXT: movl 8(%edx), %ecx
; X86-NEXT: movl 8(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 8(%eax), %esi
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: jne .LBB33_4		; X86-NEXT: jne .LBB33_4
; X86-NEXT: # %bb.3: # %loadbb3		; X86-NEXT: # %bb.3: # %loadbb3
; X86-NEXT: movl 12(%esi), %ecx		; X86-NEXT: movl 12(%edx), %ecx
; X86-NEXT: movl 12(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
; X86-NEXT: bswapl %edx		; X86-NEXT: movl 12(%eax), %esi
		; X86-NEXT: bswapl %esi
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: je .LBB33_5		; X86-NEXT: je .LBB33_5
; X86-NEXT: .LBB33_4: # %res_block		; X86-NEXT: .LBB33_4: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %esi, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB33_5: # %endblock		; X86-NEXT: .LBB33_5: # %endblock
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16_lt:		; X64-LABEL: length16_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB33_2		; X64-NEXT: jne .LBB33_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB33_3		; X64-NEXT: je .LBB33_3
; X64-NEXT: .LBB33_2: # %res_block		; X64-NEXT: .LBB33_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
; X64-NEXT: leal -1(%rax,%rax), %eax		; X64-NEXT: leal -1(%rax,%rax), %eax
; X64-NEXT: .LBB33_3: # %endblock		; X64-NEXT: .LBB33_3: # %endblock
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind		%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind
%cmp = icmp slt i32 %call, 0		%cmp = icmp slt i32 %call, 0
ret i1 %cmp		ret i1 %cmp
}		}

define i1 @length16_gt(i8* %x, i8* %y) nounwind {		define i1 @length16_gt(i8* %x, i8* %y) nounwind {
; X86-LABEL: length16_gt:		; X86-LABEL: length16_gt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx		; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl (%edx), %eax
; X86-NEXT: movl (%esi), %eax
; X86-NEXT: movl (%edx), %ecx
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: jne .LBB34_4		; X86-NEXT: jne .LBB34_4
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %eax		; X86-NEXT: movl 4(%edx), %eax
; X86-NEXT: movl 4(%edx), %ecx
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: movl 4(%ecx), %esi
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: jne .LBB34_4		; X86-NEXT: jne .LBB34_4
; X86-NEXT: # %bb.2: # %loadbb2		; X86-NEXT: # %bb.2: # %loadbb2
; X86-NEXT: movl 8(%esi), %eax		; X86-NEXT: movl 8(%edx), %eax
; X86-NEXT: movl 8(%edx), %ecx
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: movl 8(%ecx), %esi
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: bswapl %esi
		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: jne .LBB34_4		; X86-NEXT: jne .LBB34_4
; X86-NEXT: # %bb.3: # %loadbb3		; X86-NEXT: # %bb.3: # %loadbb3
; X86-NEXT: movl 12(%esi), %eax		; X86-NEXT: movl 12(%edx), %eax
; X86-NEXT: movl 12(%edx), %ecx
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: movl 12(%ecx), %esi
; X86-NEXT: xorl %edx, %edx		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: xorl %ecx, %ecx
		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: je .LBB34_5		; X86-NEXT: je .LBB34_5
; X86-NEXT: .LBB34_4: # %res_block		; X86-NEXT: .LBB34_4: # %res_block
; X86-NEXT: xorl %edx, %edx		; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: cmpl %ecx, %eax		; X86-NEXT: cmpl %esi, %eax
; X86-NEXT: setae %dl		; X86-NEXT: setae %cl
; X86-NEXT: leal -1(%edx,%edx), %edx		; X86-NEXT: leal -1(%ecx,%ecx), %ecx
; X86-NEXT: .LBB34_5: # %endblock		; X86-NEXT: .LBB34_5: # %endblock
; X86-NEXT: testl %edx, %edx		; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16_gt:		; X64-LABEL: length16_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax		; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB34_2		; X64-NEXT: jne .LBB34_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rax		; X64-NEXT: movq 8(%rdi), %rax
; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: je .LBB34_3		; X64-NEXT: je .LBB34_3
; X64-NEXT: .LBB34_2: # %res_block		; X64-NEXT: .LBB34_2: # %res_block
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: setae %dl		; X64-NEXT: setae %dl
▲ Show 20 Lines • Show All 115 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length24:		; X64-LABEL: length24:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB36_3		; X64-NEXT: jne .LBB36_3
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB36_3		; X64-NEXT: jne .LBB36_3
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rcx		; X64-NEXT: movq 16(%rdi), %rcx
; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB36_4		; X64-NEXT: je .LBB36_4
; X64-NEXT: .LBB36_3: # %res_block		; X64-NEXT: .LBB36_3: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 127 Lines • ▼ Show 20 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length24_lt:		; X64-LABEL: length24_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB38_3		; X64-NEXT: jne .LBB38_3
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB38_3		; X64-NEXT: jne .LBB38_3
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rcx		; X64-NEXT: movq 16(%rdi), %rcx
; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB38_4		; X64-NEXT: je .LBB38_4
; X64-NEXT: .LBB38_3: # %res_block		; X64-NEXT: .LBB38_3: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
Show All 18 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: testl %eax, %eax		; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length24_gt:		; X64-LABEL: length24_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax		; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB39_3		; X64-NEXT: jne .LBB39_3
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rax		; X64-NEXT: movq 8(%rdi), %rax
; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB39_3		; X64-NEXT: jne .LBB39_3
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rax		; X64-NEXT: movq 16(%rdi), %rax
; X64-NEXT: movq 16(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 16(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: je .LBB39_4		; X64-NEXT: je .LBB39_4
; X64-NEXT: .LBB39_3: # %res_block		; X64-NEXT: .LBB39_3: # %res_block
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: setae %dl		; X64-NEXT: setae %dl
▲ Show 20 Lines • Show All 513 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length32:		; X64-LABEL: length32:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB47_4		; X64-NEXT: jne .LBB47_4
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB47_4		; X64-NEXT: jne .LBB47_4
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rcx		; X64-NEXT: movq 16(%rdi), %rcx
; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB47_4		; X64-NEXT: jne .LBB47_4
; X64-NEXT: # %bb.3: # %loadbb3		; X64-NEXT: # %bb.3: # %loadbb3
; X64-NEXT: movq 24(%rdi), %rcx		; X64-NEXT: movq 24(%rdi), %rcx
; X64-NEXT: movq 24(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 24(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB47_5		; X64-NEXT: je .LBB47_5
; X64-NEXT: .LBB47_4: # %res_block		; X64-NEXT: .LBB47_4: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 141 Lines • ▼ Show 20 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length32_lt:		; X64-LABEL: length32_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB49_4		; X64-NEXT: jne .LBB49_4
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB49_4		; X64-NEXT: jne .LBB49_4
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rcx		; X64-NEXT: movq 16(%rdi), %rcx
; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 16(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB49_4		; X64-NEXT: jne .LBB49_4
; X64-NEXT: # %bb.3: # %loadbb3		; X64-NEXT: # %bb.3: # %loadbb3
; X64-NEXT: movq 24(%rdi), %rcx		; X64-NEXT: movq 24(%rdi), %rcx
; X64-NEXT: movq 24(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 24(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB49_5		; X64-NEXT: je .LBB49_5
; X64-NEXT: .LBB49_4: # %res_block		; X64-NEXT: .LBB49_4: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
Show All 18 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: testl %eax, %eax		; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length32_gt:		; X64-LABEL: length32_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax		; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB50_4		; X64-NEXT: jne .LBB50_4
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rax		; X64-NEXT: movq 8(%rdi), %rax
; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB50_4		; X64-NEXT: jne .LBB50_4
; X64-NEXT: # %bb.2: # %loadbb2		; X64-NEXT: # %bb.2: # %loadbb2
; X64-NEXT: movq 16(%rdi), %rax		; X64-NEXT: movq 16(%rdi), %rax
; X64-NEXT: movq 16(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 16(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB50_4		; X64-NEXT: jne .LBB50_4
; X64-NEXT: # %bb.3: # %loadbb3		; X64-NEXT: # %bb.3: # %loadbb3
; X64-NEXT: movq 24(%rdi), %rax		; X64-NEXT: movq 24(%rdi), %rax
; X64-NEXT: movq 24(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 24(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: je .LBB50_5		; X64-NEXT: je .LBB50_5
; X64-NEXT: .LBB50_4: # %res_block		; X64-NEXT: .LBB50_4: # %res_block
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: setae %dl		; X64-NEXT: setae %dl
▲ Show 20 Lines • Show All 3,784 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/memcmp-optsize.ll

Show All 10 Lines
@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1		@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1

declare i32 @memcmp(i8, i8, i64)		declare i32 @memcmp(i8, i8, i64)
declare i32 @bcmp(i8, i8, i64)		declare i32 @bcmp(i8, i8, i64)

define i32 @length2(i8* %X, i8* %Y) nounwind optsize {		define i32 @length2(i8* %X, i8* %Y) nounwind optsize {
; X86-LABEL: length2:		; X86-LABEL: length2:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2:		; X64-LABEL: length2:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

define i1 @length2_eq(i8* %X, i8* %Y) nounwind optsize {		define i1 @length2_eq(i8* %X, i8* %Y) nounwind optsize {
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines

define i32 @length3(i8* %X, i8* %Y) nounwind optsize {		define i32 @length3(i8* %X, i8* %Y) nounwind optsize {
; X86-LABEL: length3:		; X86-LABEL: length3:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %edx		; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx		; X86-NEXT: rolw $8, %dx
		; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %si		; X86-NEXT: rolw $8, %si
; X86-NEXT: cmpw %si, %dx		; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB4_1		; X86-NEXT: jne .LBB4_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax		; X86-NEXT: movzbl 2(%eax), %eax
; X86-NEXT: movzbl 2(%ecx), %ecx		; X86-NEXT: movzbl 2(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB4_3		; X86-NEXT: jmp .LBB4_3
; X86-NEXT: .LBB4_1: # %res_block		; X86-NEXT: .LBB4_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB4_3: # %endblock		; X86-NEXT: .LBB4_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length3:		; X64-LABEL: length3:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
		; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %cx		; X64-NEXT: rolw $8, %cx
; X64-NEXT: cmpw %cx, %ax		; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB4_1		; X64-NEXT: jne .LBB4_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax		; X64-NEXT: movzbl 2(%rdi), %eax
; X64-NEXT: movzbl 2(%rsi), %ecx		; X64-NEXT: movzbl 2(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 36 Lines
}		}

define i32 @length4(i8* %X, i8* %Y) nounwind optsize {		define i32 @length4(i8* %X, i8* %Y) nounwind optsize {
; X86-LABEL: length4:		; X86-LABEL: length4:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: seta %al		; X86-NEXT: seta %al
; X86-NEXT: sbbl $0, %eax		; X86-NEXT: sbbl $0, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4:		; X64-LABEL: length4:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
ret i32 %m		ret i32 %m
Show All 40 Lines

define i32 @length5(i8* %X, i8* %Y) nounwind optsize {		define i32 @length5(i8* %X, i8* %Y) nounwind optsize {
; X86-LABEL: length5:		; X86-LABEL: length5:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB9_1		; X86-NEXT: jne .LBB9_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB9_3		; X86-NEXT: jmp .LBB9_3
; X86-NEXT: .LBB9_1: # %res_block		; X86-NEXT: .LBB9_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB9_3: # %endblock		; X86-NEXT: .LBB9_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5:		; X64-LABEL: length5:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB9_1		; X64-NEXT: jne .LBB9_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 37 Lines

define i32 @length8(i8* %X, i8* %Y) nounwind optsize {		define i32 @length8(i8* %X, i8* %Y) nounwind optsize {
; X86-LABEL: length8:		; X86-LABEL: length8:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%esi), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB11_2		; X86-NEXT: jne .LBB11_2
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: je .LBB11_3		; X86-NEXT: je .LBB11_3
; X86-NEXT: .LBB11_2: # %res_block		; X86-NEXT: .LBB11_2: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB11_3: # %endblock		; X86-NEXT: .LBB11_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length8:		; X64-LABEL: length8:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
ret i32 %m		ret i32 %m
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length12:		; X64-LABEL: length12:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB15_2		; X64-NEXT: jne .LBB15_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx		; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB15_3		; X64-NEXT: je .LBB15_3
; X64-NEXT: .LBB15_2: # %res_block		; X64-NEXT: .LBB15_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
Show All 15 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16:		; X64-LABEL: length16:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB16_2		; X64-NEXT: jne .LBB16_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB16_3		; X64-NEXT: je .LBB16_3
; X64-NEXT: .LBB16_2: # %res_block		; X64-NEXT: .LBB16_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 494 Lines • ▼ Show 20 Lines	; X64-AVX2-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
%c = icmp eq i32 %m, 0		%c = icmp eq i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind optsize {		define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind optsize {
; X86-LABEL: bcmp_length2:		; X86-LABEL: bcmp_length2:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: bcmp_length2:		; X64-LABEL: bcmp_length2:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

llvm/test/CodeGen/X86/memcmp-pgso.ll

Show All 10 Lines
@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1		@.str = private constant [65 x i8] c"0123456789012345678901234567890123456789012345678901234567890123\00", align 1

declare i32 @memcmp(i8, i8, i64)		declare i32 @memcmp(i8, i8, i64)
declare i32 @bcmp(i8, i8, i64)		declare i32 @bcmp(i8, i8, i64)

define i32 @length2(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length2(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length2:		; X86-LABEL: length2:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2:		; X64-LABEL: length2:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

define i1 @length2_eq(i8* %X, i8* %Y) nounwind !prof !14 {		define i1 @length2_eq(i8* %X, i8* %Y) nounwind !prof !14 {
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines

define i32 @length3(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length3(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length3:		; X86-LABEL: length3:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %edx		; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx		; X86-NEXT: rolw $8, %dx
		; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %si		; X86-NEXT: rolw $8, %si
; X86-NEXT: cmpw %si, %dx		; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB4_1		; X86-NEXT: jne .LBB4_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax		; X86-NEXT: movzbl 2(%eax), %eax
; X86-NEXT: movzbl 2(%ecx), %ecx		; X86-NEXT: movzbl 2(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB4_3		; X86-NEXT: jmp .LBB4_3
; X86-NEXT: .LBB4_1: # %res_block		; X86-NEXT: .LBB4_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB4_3: # %endblock		; X86-NEXT: .LBB4_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length3:		; X64-LABEL: length3:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
		; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %cx		; X64-NEXT: rolw $8, %cx
; X64-NEXT: cmpw %cx, %ax		; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB4_1		; X64-NEXT: jne .LBB4_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax		; X64-NEXT: movzbl 2(%rdi), %eax
; X64-NEXT: movzbl 2(%rsi), %ecx		; X64-NEXT: movzbl 2(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 36 Lines
}		}

define i32 @length4(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length4(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length4:		; X86-LABEL: length4:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: seta %al		; X86-NEXT: seta %al
; X86-NEXT: sbbl $0, %eax		; X86-NEXT: sbbl $0, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4:		; X64-LABEL: length4:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
ret i32 %m		ret i32 %m
Show All 40 Lines

define i32 @length5(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length5(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length5:		; X86-LABEL: length5:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB9_1		; X86-NEXT: jne .LBB9_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB9_3		; X86-NEXT: jmp .LBB9_3
; X86-NEXT: .LBB9_1: # %res_block		; X86-NEXT: .LBB9_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB9_3: # %endblock		; X86-NEXT: .LBB9_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5:		; X64-LABEL: length5:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB9_1		; X64-NEXT: jne .LBB9_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 37 Lines

define i32 @length8(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @length8(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: length8:		; X86-LABEL: length8:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%esi), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB11_2		; X86-NEXT: jne .LBB11_2
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: je .LBB11_3		; X86-NEXT: je .LBB11_3
; X86-NEXT: .LBB11_2: # %res_block		; X86-NEXT: .LBB11_2: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB11_3: # %endblock		; X86-NEXT: .LBB11_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length8:		; X64-LABEL: length8:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
ret i32 %m		ret i32 %m
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length12:		; X64-LABEL: length12:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB15_2		; X64-NEXT: jne .LBB15_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx		; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB15_3		; X64-NEXT: je .LBB15_3
; X64-NEXT: .LBB15_2: # %res_block		; X64-NEXT: .LBB15_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
Show All 15 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16:		; X64-LABEL: length16:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB16_2		; X64-NEXT: jne .LBB16_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB16_3		; X64-NEXT: je .LBB16_3
; X64-NEXT: .LBB16_2: # %res_block		; X64-NEXT: .LBB16_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 494 Lines • ▼ Show 20 Lines	; X64-AVX2-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([65 x i8], [65 x i8]* @.str, i32 0, i32 0), i64 64) nounwind
%c = icmp eq i32 %m, 0		%c = icmp eq i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 {		define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 {
; X86-LABEL: bcmp_length2:		; X86-LABEL: bcmp_length2:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: bcmp_length2:		; X64-LABEL: bcmp_length2:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @bcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

!llvm.module.flags = !{!0}		!llvm.module.flags = !{!0}
Show All 15 Lines

llvm/test/CodeGen/X86/memcmp.ll

Show First 20 Lines • Show All 62 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 0) nounwind
%c = icmp slt i32 %m, 0		%c = icmp slt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i32 @length2(i8* %X, i8* %Y) nounwind {		define i32 @length2(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2:		; X86-LABEL: length2:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2:		; X64-LABEL: length2:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

define i32 @length2_const(i8* %X, i8* %Y) nounwind {		define i32 @length2_const(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_const:		; X86-LABEL: length2_const:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax		; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl .L.str+1, %ecx
; X86-NEXT: rolw $8, %ax		; X86-NEXT: rolw $8, %ax
; X86-NEXT: rolw $8, %cx
; X86-NEXT: movzwl %ax, %eax		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl %cx, %ecx		; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
		courbetAuthorUnsubmitted Done Reply Inline Actions This is the real change. courbet: This is the real change.
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_const:		; X64-LABEL: length2_const:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

define i1 @length2_gt_const(i8* %X, i8* %Y) nounwind {		define i1 @length2_gt_const(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_gt_const:		; X86-LABEL: length2_gt_const:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax		; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl .L.str+1, %ecx
; X86-NEXT: rolw $8, %ax		; X86-NEXT: rolw $8, %ax
; X86-NEXT: rolw $8, %cx
; X86-NEXT: movzwl %ax, %eax		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl %cx, %ecx		; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: testl %eax, %eax		; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_gt_const:		; X64-LABEL: length2_gt_const:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax		; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al		; X64-NEXT: setg %al
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
%c = icmp sgt i32 %m, 0		%c = icmp sgt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

Show All 16 Lines	; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
%c = icmp eq i32 %m, 0		%c = icmp eq i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i1 @length2_lt(i8* %X, i8* %Y) nounwind {		define i1 @length2_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_lt:		; X86-LABEL: length2_lt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %dx		; X86-NEXT: movzwl %cx, %ecx
; X86-NEXT: movzwl %cx, %eax
; X86-NEXT: movzwl %dx, %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_lt:		; X64-LABEL: length2_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
%c = icmp slt i32 %m, 0		%c = icmp slt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i1 @length2_gt(i8* %X, i8* %Y) nounwind {		define i1 @length2_gt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_gt:		; X86-LABEL: length2_gt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl (%ecx), %ecx		; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: rolw $8, %cx		; X86-NEXT: rolw $8, %cx
; X86-NEXT: rolw $8, %ax
; X86-NEXT: movzwl %cx, %ecx		; X86-NEXT: movzwl %cx, %ecx
		; X86-NEXT: movzwl (%eax), %eax
		; X86-NEXT: rolw $8, %ax
; X86-NEXT: movzwl %ax, %eax		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: subl %eax, %ecx		; X86-NEXT: subl %eax, %ecx
; X86-NEXT: testl %ecx, %ecx		; X86-NEXT: testl %ecx, %ecx
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_gt:		; X64-LABEL: length2_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
		; X64-NEXT: movzwl (%rsi), %ecx
		; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: movzwl %cx, %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax		; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al		; X64-NEXT: setg %al
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind
%c = icmp sgt i32 %m, 0		%c = icmp sgt i32 %m, 0
ret i1 %c		ret i1 %c
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines

define i32 @length3(i8* %X, i8* %Y) nounwind {		define i32 @length3(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length3:		; X86-LABEL: length3:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %edx		; X86-NEXT: movzwl (%eax), %edx
; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %dx		; X86-NEXT: rolw $8, %dx
		; X86-NEXT: movzwl (%ecx), %esi
; X86-NEXT: rolw $8, %si		; X86-NEXT: rolw $8, %si
; X86-NEXT: cmpw %si, %dx		; X86-NEXT: cmpw %si, %dx
; X86-NEXT: jne .LBB11_1		; X86-NEXT: jne .LBB11_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 2(%eax), %eax		; X86-NEXT: movzbl 2(%eax), %eax
; X86-NEXT: movzbl 2(%ecx), %ecx		; X86-NEXT: movzbl 2(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
; X86-NEXT: .LBB11_1: # %res_block		; X86-NEXT: .LBB11_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length3:		; X64-LABEL: length3:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
		; X64-NEXT: movzwl (%rsi), %ecx
; X64-NEXT: rolw $8, %cx		; X64-NEXT: rolw $8, %cx
; X64-NEXT: cmpw %cx, %ax		; X64-NEXT: cmpw %cx, %ax
; X64-NEXT: jne .LBB11_1		; X64-NEXT: jne .LBB11_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 2(%rdi), %eax		; X64-NEXT: movzbl 2(%rdi), %eax
; X64-NEXT: movzbl 2(%rsi), %ecx		; X64-NEXT: movzbl 2(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 36 Lines
}		}

define i32 @length4(i8* %X, i8* %Y) nounwind {		define i32 @length4(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length4:		; X86-LABEL: length4:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: seta %al		; X86-NEXT: seta %al
; X86-NEXT: sbbl $0, %eax		; X86-NEXT: sbbl $0, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4:		; X64-LABEL: length4:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
ret i32 %m		ret i32 %m
Show All 21 Lines
}		}

define i1 @length4_lt(i8* %X, i8* %Y) nounwind {		define i1 @length4_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length4_lt:		; X86-LABEL: length4_lt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: seta %al		; X86-NEXT: seta %al
; X86-NEXT: sbbl $0, %eax		; X86-NEXT: sbbl $0, %eax
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4_lt:		; X64-LABEL: length4_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl (%rsi), %edx
		efriedmaUnsubmitted Not Done Reply Inline Actions The scheduling here seems to be worse? efriedma: The scheduling here seems to be worse?
		courbetAuthorUnsubmitted Done Reply Inline Actions I think we could argue either way: the new scheduling interleaves loads and other computations, evening out the port pressure and increased compute parallelism. On the other hand it's true that there is less data parallelism. It should not matter with recent out-of-order cores anyway. If you feel strongly about this I can go back to interleaving the data, this is actually orthogonal to the real change. courbet: I think we could argue either way: the new scheduling interleaves loads and other computations…
		efriedmaUnsubmitted Not Done Reply Inline Actions I'd prefer to avoid unrelated changes. the new scheduling interleaves loads and other computations Scheduling an arithmetic operation that uses a loaded value immediately after the load is never going to work out; loads have latency. Granted, I agree it's unlikely to matter much on a modern x86 core. Really, I'm more surprised we aren't trying to do any scheduling at all after isel. efriedma: I'd prefer to avoid unrelated changes. > the new scheduling interleaves loads and other…
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
%c = icmp slt i32 %m, 0		%c = icmp slt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

define i1 @length4_gt(i8* %X, i8* %Y) nounwind {		define i1 @length4_gt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length4_gt:		; X86-LABEL: length4_gt:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl (%ecx), %ecx		; X86-NEXT: movl (%ecx), %ecx
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %eax
; X86-NEXT: bswapl %eax		; X86-NEXT: bswapl %eax
; X86-NEXT: xorl %edx, %edx		; X86-NEXT: xorl %edx, %edx
; X86-NEXT: cmpl %eax, %ecx		; X86-NEXT: cmpl %eax, %ecx
; X86-NEXT: seta %dl		; X86-NEXT: seta %dl
; X86-NEXT: sbbl $0, %edx		; X86-NEXT: sbbl $0, %edx
; X86-NEXT: testl %edx, %edx		; X86-NEXT: testl %edx, %edx
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4_gt:		; X64-LABEL: length4_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: seta %dl		; X64-NEXT: seta %dl
; X64-NEXT: sbbl $0, %edx		; X64-NEXT: sbbl $0, %edx
; X64-NEXT: testl %edx, %edx		; X64-NEXT: testl %edx, %edx
; X64-NEXT: setg %al		; X64-NEXT: setg %al
; X64-NEXT: retq		; X64-NEXT: retq
Show All 22 Lines

define i32 @length5(i8* %X, i8* %Y) nounwind {		define i32 @length5(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length5:		; X86-LABEL: length5:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB18_1		; X86-NEXT: jne .LBB18_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
; X86-NEXT: .LBB18_1: # %res_block		; X86-NEXT: .LBB18_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5:		; X64-LABEL: length5:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB18_1		; X64-NEXT: jne .LBB18_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
Show All 37 Lines

define i1 @length5_lt(i8* %X, i8* %Y) nounwind {		define i1 @length5_lt(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length5_lt:		; X86-LABEL: length5_lt:
; X86: # %bb.0: # %loadbb		; X86: # %bb.0: # %loadbb
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %edx		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
		; X86-NEXT: movl (%ecx), %esi
; X86-NEXT: bswapl %esi		; X86-NEXT: bswapl %esi
; X86-NEXT: cmpl %esi, %edx		; X86-NEXT: cmpl %esi, %edx
; X86-NEXT: jne .LBB20_1		; X86-NEXT: jne .LBB20_1
; X86-NEXT: # %bb.2: # %loadbb1		; X86-NEXT: # %bb.2: # %loadbb1
; X86-NEXT: movzbl 4(%eax), %eax		; X86-NEXT: movzbl 4(%eax), %eax
; X86-NEXT: movzbl 4(%ecx), %ecx		; X86-NEXT: movzbl 4(%ecx), %ecx
; X86-NEXT: subl %ecx, %eax		; X86-NEXT: subl %ecx, %eax
; X86-NEXT: jmp .LBB20_3		; X86-NEXT: jmp .LBB20_3
; X86-NEXT: .LBB20_1: # %res_block		; X86-NEXT: .LBB20_1: # %res_block
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: movzbl %al, %eax		; X86-NEXT: movzbl %al, %eax
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB20_3: # %endblock		; X86-NEXT: .LBB20_3: # %endblock
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length5_lt:		; X64-LABEL: length5_lt:
; X64: # %bb.0: # %loadbb		; X64: # %bb.0: # %loadbb
; X64-NEXT: movl (%rdi), %eax		; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %eax		; X64-NEXT: bswapl %eax
		; X64-NEXT: movl (%rsi), %ecx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: cmpl %ecx, %eax		; X64-NEXT: cmpl %ecx, %eax
; X64-NEXT: jne .LBB20_1		; X64-NEXT: jne .LBB20_1
; X64-NEXT: # %bb.2: # %loadbb1		; X64-NEXT: # %bb.2: # %loadbb1
; X64-NEXT: movzbl 4(%rdi), %eax		; X64-NEXT: movzbl 4(%rdi), %eax
; X64-NEXT: movzbl 4(%rsi), %ecx		; X64-NEXT: movzbl 4(%rsi), %ecx
; X64-NEXT: subl %ecx, %eax		; X64-NEXT: subl %ecx, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
Show All 40 Lines

define i32 @length8(i8* %X, i8* %Y) nounwind {		define i32 @length8(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length8:		; X86-LABEL: length8:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: pushl %esi		; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi		; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl (%esi), %ecx		; X86-NEXT: movl (%esi), %ecx
; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl (%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: jne .LBB22_2		; X86-NEXT: jne .LBB22_2
; X86-NEXT: # %bb.1: # %loadbb1		; X86-NEXT: # %bb.1: # %loadbb1
; X86-NEXT: movl 4(%esi), %ecx		; X86-NEXT: movl 4(%esi), %ecx
; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %ecx		; X86-NEXT: bswapl %ecx
		; X86-NEXT: movl 4(%eax), %edx
; X86-NEXT: bswapl %edx		; X86-NEXT: bswapl %edx
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: je .LBB22_3		; X86-NEXT: je .LBB22_3
; X86-NEXT: .LBB22_2: # %res_block		; X86-NEXT: .LBB22_2: # %res_block
; X86-NEXT: xorl %eax, %eax		; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx		; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: setae %al		; X86-NEXT: setae %al
; X86-NEXT: leal -1(%eax,%eax), %eax		; X86-NEXT: leal -1(%eax,%eax), %eax
; X86-NEXT: .LBB22_3: # %endblock		; X86-NEXT: .LBB22_3: # %endblock
; X86-NEXT: popl %esi		; X86-NEXT: popl %esi
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length8:		; X64-LABEL: length8:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind
ret i32 %m		ret i32 %m
▲ Show 20 Lines • Show All 165 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length12:		; X64-LABEL: length12:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB29_2		; X64-NEXT: jne .LBB29_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movl 8(%rdi), %ecx		; X64-NEXT: movl 8(%rdi), %ecx
; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
		; X64-NEXT: movl 8(%rsi), %edx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB29_3		; X64-NEXT: je .LBB29_3
; X64-NEXT: .LBB29_2: # %res_block		; X64-NEXT: .LBB29_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 144 Lines • ▼ Show 20 Lines
; X86-NEXT: pushl {{[0-9]+}}(%esp)		; X86-NEXT: pushl {{[0-9]+}}(%esp)
; X86-NEXT: calll memcmp		; X86-NEXT: calll memcmp
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16:		; X64-LABEL: length16:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB35_2		; X64-NEXT: jne .LBB35_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB35_3		; X64-NEXT: je .LBB35_3
; X64-NEXT: .LBB35_2: # %res_block		; X64-NEXT: .LBB35_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
▲ Show 20 Lines • Show All 104 Lines • ▼ Show 20 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: shrl $31, %eax		; X86-NEXT: shrl $31, %eax
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16_lt:		; X64-LABEL: length16_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rcx		; X64-NEXT: movq (%rdi), %rcx
; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq (%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: jne .LBB37_2		; X64-NEXT: jne .LBB37_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rcx		; X64-NEXT: movq 8(%rdi), %rcx
; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
		; X64-NEXT: movq 8(%rsi), %rdx
; X64-NEXT: bswapq %rdx		; X64-NEXT: bswapq %rdx
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: je .LBB37_3		; X64-NEXT: je .LBB37_3
; X64-NEXT: .LBB37_2: # %res_block		; X64-NEXT: .LBB37_2: # %res_block
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpq %rdx, %rcx		; X64-NEXT: cmpq %rdx, %rcx
; X64-NEXT: setae %al		; X64-NEXT: setae %al
Show All 18 Lines
; X86-NEXT: addl $16, %esp		; X86-NEXT: addl $16, %esp
; X86-NEXT: testl %eax, %eax		; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length16_gt:		; X64-LABEL: length16_gt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movq (%rdi), %rax		; X64-NEXT: movq (%rdi), %rax
; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq (%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: jne .LBB38_2		; X64-NEXT: jne .LBB38_2
; X64-NEXT: # %bb.1: # %loadbb1		; X64-NEXT: # %bb.1: # %loadbb1
; X64-NEXT: movq 8(%rdi), %rax		; X64-NEXT: movq 8(%rdi), %rax
; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rax		; X64-NEXT: bswapq %rax
		; X64-NEXT: movq 8(%rsi), %rcx
; X64-NEXT: bswapq %rcx		; X64-NEXT: bswapq %rcx
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: je .LBB38_3		; X64-NEXT: je .LBB38_3
; X64-NEXT: .LBB38_2: # %res_block		; X64-NEXT: .LBB38_2: # %res_block
; X64-NEXT: xorl %edx, %edx		; X64-NEXT: xorl %edx, %edx
; X64-NEXT: cmpq %rcx, %rax		; X64-NEXT: cmpq %rcx, %rax
; X64-NEXT: setae %dl		; X64-NEXT: setae %dl
▲ Show 20 Lines • Show All 3,566 Lines • Show Last 20 Lines

llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -S -expandmemcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s \| FileCheck %s --check-prefix=ALL --check-prefix=X32			; RUN: opt -S -expandmemcmp -mtriple=i686-unknown-unknown -data-layout=e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128 < %s \| FileCheck %s --check-prefix=ALL --check-prefix=X32
	; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s \| FileCheck %s --check-prefix=ALL --check-prefix=X64 --check-prefix=X64_1LD			; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=1 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s \| FileCheck %s --check-prefix=ALL --check-prefix=X64 --check-prefix=X64_1LD
	; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s \| FileCheck %s --check-prefix=ALL --check-prefix=X64 --check-prefix=X64_2LD			; RUN: opt -S -expandmemcmp -memcmp-num-loads-per-block=2 -mtriple=x86_64-unknown-unknown -data-layout=e-m:o-i64:64-f80:128-n8:16:32:64-S128 < %s \| FileCheck %s --check-prefix=ALL --check-prefix=X64 --check-prefix=X64_2LD

	declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)			declare i32 @memcmp(i8* nocapture, i8* nocapture, i64)

	define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; ALL-LABEL: @cmp2(			; ALL-LABEL: @cmp2(
	; ALL-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16			; ALL-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16
	; ALL-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i16			; ALL-NEXT: [[TMP2:%.]] = load i16, i16 [[TMP1]]
	; ALL-NEXT: [[TMP3:%.]] = load i16, i16 [[TMP1]]			; ALL-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
	; ALL-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP2]]			; ALL-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
	; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])			; ALL-NEXT: [[TMP5:%.]] = bitcast i8 [[Y:%.]] to i16
	; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])			; ALL-NEXT: [[TMP6:%.]] = load i16, i16 [[TMP5]]
	; ALL-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32			; ALL-NEXT: [[TMP7:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
	; ALL-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32			; ALL-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32
	; ALL-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]]			; ALL-NEXT: [[TMP9:%.*]] = sub i32 [[TMP4]], [[TMP8]]
	; ALL-NEXT: ret i32 [[TMP9]]			; ALL-NEXT: ret i32 [[TMP9]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
	ret i32 %call			ret i32 %call
	}			}

	define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; ALL-LABEL: @cmp3(			; ALL-LABEL: @cmp3(
	; ALL-NEXT: br label [[LOADBB:%.*]]			; ALL-NEXT: br label [[LOADBB:%.*]]
	; ALL: res_block:			; ALL: res_block:
	; ALL-NEXT: [[PHI_SRC1:%.]] = phi i16 [ [[TMP7:%.]], [[LOADBB]] ]			; ALL-NEXT: [[PHI_SRC1:%.]] = phi i16 [ [[TMP5:%.]], [[LOADBB]] ]
	; ALL-NEXT: [[PHI_SRC2:%.]] = phi i16 [ [[TMP8:%.]], [[LOADBB]] ]			; ALL-NEXT: [[PHI_SRC2:%.]] = phi i16 [ [[TMP8:%.]], [[LOADBB]] ]
	; ALL-NEXT: [[TMP1:%.*]] = icmp ult i16 [[PHI_SRC1]], [[PHI_SRC2]]			; ALL-NEXT: [[TMP1:%.*]] = icmp ult i16 [[PHI_SRC1]], [[PHI_SRC2]]
	; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1			; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
	; ALL-NEXT: br label [[ENDBLOCK:%.*]]			; ALL-NEXT: br label [[ENDBLOCK:%.*]]
	; ALL: loadbb:			; ALL: loadbb:
	; ALL-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i16			; ALL-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i16
	; ALL-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i16			; ALL-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP3]]
	; ALL-NEXT: [[TMP5:%.]] = load i16, i16 [[TMP3]]			; ALL-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
	; ALL-NEXT: [[TMP6:%.]] = load i16, i16 [[TMP4]]			; ALL-NEXT: [[TMP6:%.]] = bitcast i8 [[Y:%.]] to i16
	; ALL-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])			; ALL-NEXT: [[TMP7:%.]] = load i16, i16 [[TMP6]]
	; ALL-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])			; ALL-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP7]])
	; ALL-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]			; ALL-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP5]], [[TMP8]]
	; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.]], label [[RES_BLOCK:%.]]			; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.]], label [[RES_BLOCK:%.]]
	; ALL: loadbb1:			; ALL: loadbb1:
	; ALL-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 2			; ALL-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 2
	; ALL-NEXT: [[TMP11:%.]] = getelementptr i8, i8 [[Y]], i64 2			; ALL-NEXT: [[TMP11:%.]] = load i8, i8 [[TMP10]]
	; ALL-NEXT: [[TMP12:%.]] = load i8, i8 [[TMP10]]			; ALL-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
	; ALL-NEXT: [[TMP13:%.]] = load i8, i8 [[TMP11]]			; ALL-NEXT: [[TMP13:%.]] = getelementptr i8, i8 [[Y]], i64 2
	; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32			; ALL-NEXT: [[TMP14:%.]] = load i8, i8 [[TMP13]]
	; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32			; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP14]] to i32
	; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]			; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP12]], [[TMP15]]
	; ALL-NEXT: br label [[ENDBLOCK]]			; ALL-NEXT: br label [[ENDBLOCK]]
	; ALL: endblock:			; ALL: endblock:
	; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; ALL-NEXT: ret i32 [[PHI_RES]]			; ALL-NEXT: ret i32 [[PHI_RES]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
	ret i32 %call			ret i32 %call
	}			}

	define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; ALL-LABEL: @cmp4(			; ALL-LABEL: @cmp4(
	; ALL-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; ALL-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; ALL-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; ALL-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; ALL-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; ALL-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
	; ALL-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; ALL-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i32
	; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])			; ALL-NEXT: [[TMP5:%.]] = load i32, i32 [[TMP4]]
	; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])			; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
	; ALL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]			; ALL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], [[TMP6]]
	; ALL-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]]			; ALL-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP3]], [[TMP6]]
	; ALL-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32			; ALL-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
	; ALL-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32			; ALL-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
	; ALL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]			; ALL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
	; ALL-NEXT: ret i32 [[TMP11]]			; ALL-NEXT: ret i32 [[TMP11]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
	ret i32 %call			ret i32 %call
	}			}

	define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; ALL-LABEL: @cmp5(			; ALL-LABEL: @cmp5(
	; ALL-NEXT: br label [[LOADBB:%.*]]			; ALL-NEXT: br label [[LOADBB:%.*]]
	; ALL: res_block:			; ALL: res_block:
	; ALL-NEXT: [[PHI_SRC1:%.]] = phi i32 [ [[TMP7:%.]], [[LOADBB]] ]			; ALL-NEXT: [[PHI_SRC1:%.]] = phi i32 [ [[TMP5:%.]], [[LOADBB]] ]
	; ALL-NEXT: [[PHI_SRC2:%.]] = phi i32 [ [[TMP8:%.]], [[LOADBB]] ]			; ALL-NEXT: [[PHI_SRC2:%.]] = phi i32 [ [[TMP8:%.]], [[LOADBB]] ]
	; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]			; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
	; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1			; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
	; ALL-NEXT: br label [[ENDBLOCK:%.*]]			; ALL-NEXT: br label [[ENDBLOCK:%.*]]
	; ALL: loadbb:			; ALL: loadbb:
	; ALL-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i32			; ALL-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i32
	; ALL-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i32			; ALL-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; ALL-NEXT: [[TMP5:%.]] = load i32, i32 [[TMP3]]			; ALL-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
	; ALL-NEXT: [[TMP6:%.]] = load i32, i32 [[TMP4]]			; ALL-NEXT: [[TMP6:%.]] = bitcast i8 [[Y:%.]] to i32
	; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])			; ALL-NEXT: [[TMP7:%.]] = load i32, i32 [[TMP6]]
	; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])			; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP7]])
	; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]			; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], [[TMP8]]
	; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.]], label [[RES_BLOCK:%.]]			; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.]], label [[RES_BLOCK:%.]]
	; ALL: loadbb1:			; ALL: loadbb1:
	; ALL-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4			; ALL-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4
	; ALL-NEXT: [[TMP11:%.]] = getelementptr i8, i8 [[Y]], i64 4			; ALL-NEXT: [[TMP11:%.]] = load i8, i8 [[TMP10]]
	; ALL-NEXT: [[TMP12:%.]] = load i8, i8 [[TMP10]]			; ALL-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
	; ALL-NEXT: [[TMP13:%.]] = load i8, i8 [[TMP11]]			; ALL-NEXT: [[TMP13:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32			; ALL-NEXT: [[TMP14:%.]] = load i8, i8 [[TMP13]]
	; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32			; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP14]] to i32
	; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]			; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP12]], [[TMP15]]
	; ALL-NEXT: br label [[ENDBLOCK]]			; ALL-NEXT: br label [[ENDBLOCK]]
	; ALL: endblock:			; ALL: endblock:
	; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; ALL-NEXT: ret i32 [[PHI_RES]]			; ALL-NEXT: ret i32 [[PHI_RES]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
	ret i32 %call			ret i32 %call
	}			}

	define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; ALL-LABEL: @cmp6(			; ALL-LABEL: @cmp6(
	; ALL-NEXT: br label [[LOADBB:%.*]]			; ALL-NEXT: br label [[LOADBB:%.*]]
	; ALL: res_block:			; ALL: res_block:
	; ALL-NEXT: [[PHI_SRC1:%.]] = phi i32 [ [[TMP7:%.]], [[LOADBB]] ], [ [[TMP18:%.]], [[LOADBB1:%.]] ]			; ALL-NEXT: [[PHI_SRC1:%.]] = phi i32 [ [[TMP5:%.]], [[LOADBB]] ], [ [[TMP14:%.]], [[LOADBB1:%.]] ]
	; ALL-NEXT: [[PHI_SRC2:%.]] = phi i32 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]			; ALL-NEXT: [[PHI_SRC2:%.]] = phi i32 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
	; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]			; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
	; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1			; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
	; ALL-NEXT: br label [[ENDBLOCK:%.*]]			; ALL-NEXT: br label [[ENDBLOCK:%.*]]
	; ALL: loadbb:			; ALL: loadbb:
	; ALL-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i32			; ALL-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i32
	; ALL-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i32			; ALL-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; ALL-NEXT: [[TMP5:%.]] = load i32, i32 [[TMP3]]			; ALL-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
	; ALL-NEXT: [[TMP6:%.]] = load i32, i32 [[TMP4]]			; ALL-NEXT: [[TMP6:%.]] = bitcast i8 [[Y:%.]] to i32
	; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])			; ALL-NEXT: [[TMP7:%.]] = load i32, i32 [[TMP6]]
	; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])			; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP7]])
	; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]			; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], [[TMP8]]
	; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; ALL: loadbb1:			; ALL: loadbb1:
	; ALL-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4			; ALL-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4
	; ALL-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i16*			; ALL-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i16*
	; ALL-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 4			; ALL-NEXT: [[TMP12:%.]] = load i16, i16 [[TMP11]]
	; ALL-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i16*			; ALL-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP12]])
	; ALL-NEXT: [[TMP14:%.]] = load i16, i16 [[TMP11]]			; ALL-NEXT: [[TMP14]] = zext i16 [[TMP13]] to i32
	; ALL-NEXT: [[TMP15:%.]] = load i16, i16 [[TMP13]]			; ALL-NEXT: [[TMP15:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])			; ALL-NEXT: [[TMP16:%.]] = bitcast i8 [[TMP15]] to i16*
	; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])			; ALL-NEXT: [[TMP17:%.]] = load i16, i16 [[TMP16]]
	; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32			; ALL-NEXT: [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP17]])
	; ALL-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32			; ALL-NEXT: [[TMP19]] = zext i16 [[TMP18]] to i32
	; ALL-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]]			; ALL-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP14]], [[TMP19]]
	; ALL-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; ALL-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; ALL: endblock:			; ALL: endblock:
	; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; ALL-NEXT: ret i32 [[PHI_RES]]			; ALL-NEXT: ret i32 [[PHI_RES]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
	ret i32 %call			ret i32 %call
	}			}

	define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; ALL-LABEL: @cmp7(			; ALL-LABEL: @cmp7(
	; ALL-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 7)			; ALL-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 7)
	; ALL-NEXT: ret i32 [[CALL]]			; ALL-NEXT: ret i32 [[CALL]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
	ret i32 %call			ret i32 %call
	}			}

	define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp8(			; X32-LABEL: @cmp8(
	; X32-NEXT: br label [[LOADBB:%.*]]			; X32-NEXT: br label [[LOADBB:%.*]]
	; X32: res_block:			; X32: res_block:
	; X32-NEXT: [[PHI_SRC1:%.]] = phi i32 [ [[TMP7:%.]], [[LOADBB]] ], [ [[TMP16:%.]], [[LOADBB1:%.]] ]			; X32-NEXT: [[PHI_SRC1:%.]] = phi i32 [ [[TMP5:%.]], [[LOADBB]] ], [ [[TMP13:%.]], [[LOADBB1:%.]] ]
	; X32-NEXT: [[PHI_SRC2:%.]] = phi i32 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]			; X32-NEXT: [[PHI_SRC2:%.]] = phi i32 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
	; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]			; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]]
	; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1			; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
	; X32-NEXT: br label [[ENDBLOCK:%.*]]			; X32-NEXT: br label [[ENDBLOCK:%.*]]
	; X32: loadbb:			; X32: loadbb:
	; X32-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i32			; X32-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i32
	; X32-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i32			; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X32-NEXT: [[TMP5:%.]] = load i32, i32 [[TMP3]]			; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
	; X32-NEXT: [[TMP6:%.]] = load i32, i32 [[TMP4]]			; X32-NEXT: [[TMP6:%.]] = bitcast i8 [[Y:%.]] to i32
	; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])			; X32-NEXT: [[TMP7:%.]] = load i32, i32 [[TMP6]]
	; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])			; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP7]])
	; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]			; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], [[TMP8]]
	; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; X32: loadbb1:			; X32: loadbb1:
	; X32-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4			; X32-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X32-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i32*			; X32-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i32*
	; X32-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X32-NEXT: [[TMP12:%.]] = load i32, i32 [[TMP11]]
	; X32-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i32*			; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP12]])
	; X32-NEXT: [[TMP14:%.]] = load i32, i32 [[TMP11]]			; X32-NEXT: [[TMP14:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X32-NEXT: [[TMP15:%.]] = load i32, i32 [[TMP13]]			; X32-NEXT: [[TMP15:%.]] = bitcast i8 [[TMP14]] to i32*
	; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])			; X32-NEXT: [[TMP16:%.]] = load i32, i32 [[TMP15]]
	; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])			; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP16]])
	; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]			; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP13]], [[TMP17]]
	; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; X32: endblock:			; X32: endblock:
	; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; X32-NEXT: ret i32 [[PHI_RES]]			; X32-NEXT: ret i32 [[PHI_RES]]
	;			;
	; X64-LABEL: @cmp8(			; X64-LABEL: @cmp8(
	; X64-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
	; X64-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])			; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP4]]
	; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])			; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
	; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]			; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP3]], [[TMP6]]
	; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]]			; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP3]], [[TMP6]]
	; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32			; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32
	; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32			; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32
	; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]			; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]]
	; X64-NEXT: ret i32 [[TMP11]]			; X64-NEXT: ret i32 [[TMP11]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
	ret i32 %call			ret i32 %call
	}			}

	define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp9(			; X32-LABEL: @cmp9(
	; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 9)			; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 9)
	; X32-NEXT: ret i32 [[CALL]]			; X32-NEXT: ret i32 [[CALL]]
	;			;
	; X64-LABEL: @cmp9(			; X64-LABEL: @cmp9(
	; X64-NEXT: br label [[LOADBB:%.*]]			; X64-NEXT: br label [[LOADBB:%.*]]
	; X64: res_block:			; X64: res_block:
	; X64-NEXT: [[PHI_SRC1:%.]] = phi i64 [ [[TMP7:%.]], [[LOADBB]] ]			; X64-NEXT: [[PHI_SRC1:%.]] = phi i64 [ [[TMP5:%.]], [[LOADBB]] ]
	; X64-NEXT: [[PHI_SRC2:%.]] = phi i64 [ [[TMP8:%.]], [[LOADBB]] ]			; X64-NEXT: [[PHI_SRC2:%.]] = phi i64 [ [[TMP8:%.]], [[LOADBB]] ]
	; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]			; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
	; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1			; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
	; X64-NEXT: br label [[ENDBLOCK:%.*]]			; X64-NEXT: br label [[ENDBLOCK:%.*]]
	; X64: loadbb:			; X64: loadbb:
	; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i64			; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i64
	; X64-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i64			; X64-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]			; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
	; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]			; X64-NEXT: [[TMP6:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])			; X64-NEXT: [[TMP7:%.]] = load i64, i64 [[TMP6]]
	; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])			; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
	; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]			; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP5]], [[TMP8]]
	; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.]], label [[RES_BLOCK:%.]]			; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.]], label [[RES_BLOCK:%.]]
	; X64: loadbb1:			; X64: loadbb1:
	; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64-NEXT: [[TMP11:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64-NEXT: [[TMP11:%.]] = load i8, i8 [[TMP10]]
	; X64-NEXT: [[TMP12:%.]] = load i8, i8 [[TMP10]]			; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
	; X64-NEXT: [[TMP13:%.]] = load i8, i8 [[TMP11]]			; X64-NEXT: [[TMP13:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32			; X64-NEXT: [[TMP14:%.]] = load i8, i8 [[TMP13]]
	; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32			; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP14]] to i32
	; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]			; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP12]], [[TMP15]]
	; X64-NEXT: br label [[ENDBLOCK]]			; X64-NEXT: br label [[ENDBLOCK]]
	; X64: endblock:			; X64: endblock:
	; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; X64-NEXT: ret i32 [[PHI_RES]]			; X64-NEXT: ret i32 [[PHI_RES]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
	ret i32 %call			ret i32 %call
	}			}

	define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp10(			; X32-LABEL: @cmp10(
	; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 10)			; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 10)
	; X32-NEXT: ret i32 [[CALL]]			; X32-NEXT: ret i32 [[CALL]]
	;			;
	; X64-LABEL: @cmp10(			; X64-LABEL: @cmp10(
	; X64-NEXT: br label [[LOADBB:%.*]]			; X64-NEXT: br label [[LOADBB:%.*]]
	; X64: res_block:			; X64: res_block:
	; X64-NEXT: [[PHI_SRC1:%.]] = phi i64 [ [[TMP7:%.]], [[LOADBB]] ], [ [[TMP18:%.]], [[LOADBB1:%.]] ]			; X64-NEXT: [[PHI_SRC1:%.]] = phi i64 [ [[TMP5:%.]], [[LOADBB]] ], [ [[TMP14:%.]], [[LOADBB1:%.]] ]
	; X64-NEXT: [[PHI_SRC2:%.]] = phi i64 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]			; X64-NEXT: [[PHI_SRC2:%.]] = phi i64 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
	; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]			; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
	; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1			; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
	; X64-NEXT: br label [[ENDBLOCK:%.*]]			; X64-NEXT: br label [[ENDBLOCK:%.*]]
	; X64: loadbb:			; X64: loadbb:
	; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i64			; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i64
	; X64-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i64			; X64-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]			; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
	; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]			; X64-NEXT: [[TMP6:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])			; X64-NEXT: [[TMP7:%.]] = load i64, i64 [[TMP6]]
	; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])			; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
	; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]			; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP5]], [[TMP8]]
	; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; X64: loadbb1:			; X64: loadbb1:
	; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i16*			; X64-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i16*
	; X64-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64-NEXT: [[TMP12:%.]] = load i16, i16 [[TMP11]]
	; X64-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i16*			; X64-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP12]])
	; X64-NEXT: [[TMP14:%.]] = load i16, i16 [[TMP11]]			; X64-NEXT: [[TMP14]] = zext i16 [[TMP13]] to i64
	; X64-NEXT: [[TMP15:%.]] = load i16, i16 [[TMP13]]			; X64-NEXT: [[TMP15:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])			; X64-NEXT: [[TMP16:%.]] = bitcast i8 [[TMP15]] to i16*
	; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])			; X64-NEXT: [[TMP17:%.]] = load i16, i16 [[TMP16]]
	; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64			; X64-NEXT: [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP17]])
	; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64			; X64-NEXT: [[TMP19]] = zext i16 [[TMP18]] to i64
	; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]			; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP14]], [[TMP19]]
	; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; X64: endblock:			; X64: endblock:
	; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; X64-NEXT: ret i32 [[PHI_RES]]			; X64-NEXT: ret i32 [[PHI_RES]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
	ret i32 %call			ret i32 %call
	}			}
	Show All 10 Lines
	define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp12(			; X32-LABEL: @cmp12(
	; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 12)			; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 12)
	; X32-NEXT: ret i32 [[CALL]]			; X32-NEXT: ret i32 [[CALL]]
	;			;
	; X64-LABEL: @cmp12(			; X64-LABEL: @cmp12(
	; X64-NEXT: br label [[LOADBB:%.*]]			; X64-NEXT: br label [[LOADBB:%.*]]
	; X64: res_block:			; X64: res_block:
	; X64-NEXT: [[PHI_SRC1:%.]] = phi i64 [ [[TMP7:%.]], [[LOADBB]] ], [ [[TMP18:%.]], [[LOADBB1:%.]] ]			; X64-NEXT: [[PHI_SRC1:%.]] = phi i64 [ [[TMP5:%.]], [[LOADBB]] ], [ [[TMP14:%.]], [[LOADBB1:%.]] ]
	; X64-NEXT: [[PHI_SRC2:%.]] = phi i64 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]			; X64-NEXT: [[PHI_SRC2:%.]] = phi i64 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ]
	; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]			; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
	; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1			; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
	; X64-NEXT: br label [[ENDBLOCK:%.*]]			; X64-NEXT: br label [[ENDBLOCK:%.*]]
	; X64: loadbb:			; X64: loadbb:
	; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i64			; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i64
	; X64-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i64			; X64-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]			; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
	; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]			; X64-NEXT: [[TMP6:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])			; X64-NEXT: [[TMP7:%.]] = load i64, i64 [[TMP6]]
	; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])			; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
	; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]			; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP5]], [[TMP8]]
	; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; X64: loadbb1:			; X64: loadbb1:
	; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i32*			; X64-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i32*
	; X64-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64-NEXT: [[TMP12:%.]] = load i32, i32 [[TMP11]]
	; X64-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i32*			; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP12]])
	; X64-NEXT: [[TMP14:%.]] = load i32, i32 [[TMP11]]			; X64-NEXT: [[TMP14]] = zext i32 [[TMP13]] to i64
	; X64-NEXT: [[TMP15:%.]] = load i32, i32 [[TMP13]]			; X64-NEXT: [[TMP15:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])			; X64-NEXT: [[TMP16:%.]] = bitcast i8 [[TMP15]] to i32*
	; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])			; X64-NEXT: [[TMP17:%.]] = load i32, i32 [[TMP16]]
	; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64			; X64-NEXT: [[TMP18:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP17]])
	; X64-NEXT: [[TMP19]] = zext i32 [[TMP17]] to i64			; X64-NEXT: [[TMP19]] = zext i32 [[TMP18]] to i64
	; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]			; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP14]], [[TMP19]]
	; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; X64: endblock:			; X64: endblock:
	; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; X64-NEXT: ret i32 [[PHI_RES]]			; X64-NEXT: ret i32 [[PHI_RES]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
	ret i32 %call			ret i32 %call
	}			}
	Show All 28 Lines
	define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp16(			; X32-LABEL: @cmp16(
	; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 16)			; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 16)
	; X32-NEXT: ret i32 [[CALL]]			; X32-NEXT: ret i32 [[CALL]]
	;			;
	; X64-LABEL: @cmp16(			; X64-LABEL: @cmp16(
	; X64-NEXT: br label [[LOADBB:%.*]]			; X64-NEXT: br label [[LOADBB:%.*]]
	; X64: res_block:			; X64: res_block:
	; X64-NEXT: [[PHI_SRC1:%.]] = phi i64 [ [[TMP7:%.]], [[LOADBB]] ], [ [[TMP16:%.]], [[LOADBB1:%.]] ]			; X64-NEXT: [[PHI_SRC1:%.]] = phi i64 [ [[TMP5:%.]], [[LOADBB]] ], [ [[TMP13:%.]], [[LOADBB1:%.]] ]
	; X64-NEXT: [[PHI_SRC2:%.]] = phi i64 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]			; X64-NEXT: [[PHI_SRC2:%.]] = phi i64 [ [[TMP8:%.]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ]
	; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]			; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]]
	; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1			; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1
	; X64-NEXT: br label [[ENDBLOCK:%.*]]			; X64-NEXT: br label [[ENDBLOCK:%.*]]
	; X64: loadbb:			; X64: loadbb:
	; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i64			; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[X:%.]] to i64
	; X64-NEXT: [[TMP4:%.]] = bitcast i8 [[Y:%.]] to i64			; X64-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]			; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
	; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]			; X64-NEXT: [[TMP6:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])			; X64-NEXT: [[TMP7:%.]] = load i64, i64 [[TMP6]]
	; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])			; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]])
	; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]			; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP5]], [[TMP8]]
	; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; X64: loadbb1:			; X64: loadbb1:
	; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i64*			; X64-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i64*
	; X64-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64-NEXT: [[TMP12:%.]] = load i64, i64 [[TMP11]]
	; X64-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i64*			; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP12]])
	; X64-NEXT: [[TMP14:%.]] = load i64, i64 [[TMP11]]			; X64-NEXT: [[TMP14:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64-NEXT: [[TMP15:%.]] = load i64, i64 [[TMP13]]			; X64-NEXT: [[TMP15:%.]] = bitcast i8 [[TMP14]] to i64*
	; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])			; X64-NEXT: [[TMP16:%.]] = load i64, i64 [[TMP15]]
	; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])			; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP16]])
	; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]			; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP13]], [[TMP17]]
	; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; X64: endblock:			; X64: endblock:
	; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; X64-NEXT: ret i32 [[PHI_RES]]			; X64-NEXT: ret i32 [[PHI_RES]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
	ret i32 %call			ret i32 %call
	}			}

	define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; ALL-LABEL: @cmp_eq2(			; ALL-LABEL: @cmp_eq2(
	; ALL-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16			; ALL-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16
	; ALL-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i16			; ALL-NEXT: [[TMP2:%.]] = load i16, i16 [[TMP1]]
	; ALL-NEXT: [[TMP3:%.]] = load i16, i16 [[TMP1]]			; ALL-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i16
	; ALL-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP2]]			; ALL-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP3]]
	; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]			; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP2]], [[TMP4]]
	; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32			; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
	; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0			; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
	; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; ALL-NEXT: ret i32 [[CONV]]			; ALL-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

	define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq3(			; X32-LABEL: @cmp_eq3(
	; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16			; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16
	; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i16			; X32-NEXT: [[TMP2:%.]] = load i16, i16 [[TMP1]]
	; X32-NEXT: [[TMP3:%.]] = load i16, i16 [[TMP1]]			; X32-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i16
	; X32-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP2]]			; X32-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP3]]
	; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]			; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP2]], [[TMP4]]
	; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 2			; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 2
	; X32-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 2			; X32-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP6]]
	; X32-NEXT: [[TMP8:%.]] = load i8, i8 [[TMP6]]			; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i16
	; X32-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP7]]			; X32-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 2
	; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16			; X32-NEXT: [[TMP10:%.]] = load i8, i8 [[TMP9]]
	; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16			; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i16
	; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]			; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP8]], [[TMP11]]
	; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]			; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
	; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0			; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
	; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq3(			; X64_1LD-LABEL: @cmp_eq3(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i16			; X64_1LD-NEXT: [[TMP2:%.]] = load i16, i16 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i16, i16 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i16
	; X64_1LD-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 2			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 2
	; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 2			; X64_1LD-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP6]]
	; X64_1LD-NEXT: [[TMP8:%.]] = load i8, i8 [[TMP6]]			; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 2
	; X64_1LD-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP7]]			; X64_1LD-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP8]]
	; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]			; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP7]], [[TMP9]]
	; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq3(			; X64_2LD-LABEL: @cmp_eq3(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i16
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i16			; X64_2LD-NEXT: [[TMP2:%.]] = load i16, i16 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i16, i16 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i16
	; X64_2LD-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i16, i16 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 2			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 2
	; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 2			; X64_2LD-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP6]]
	; X64_2LD-NEXT: [[TMP8:%.]] = load i8, i8 [[TMP6]]			; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i16
	; X64_2LD-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP7]]			; X64_2LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 2
	; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16			; X64_2LD-NEXT: [[TMP10:%.]] = load i8, i8 [[TMP9]]
	; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16			; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i16
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP8]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 3)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

	define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; ALL-LABEL: @cmp_eq4(			; ALL-LABEL: @cmp_eq4(
	; ALL-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; ALL-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; ALL-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; ALL-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; ALL-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; ALL-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; ALL-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; ALL-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]			; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP2]], [[TMP4]]
	; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32			; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
	; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0			; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
	; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; ALL-NEXT: ret i32 [[CONV]]			; ALL-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

	define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq5(			; X32-LABEL: @cmp_eq5(
	; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X32-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X32-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]]
	; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X32-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X32-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP6]]
	; X32-NEXT: [[TMP8:%.]] = load i8, i8 [[TMP6]]			; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32
	; X32-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP7]]			; X32-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32			; X32-NEXT: [[TMP10:%.]] = load i8, i8 [[TMP9]]
	; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32			; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
	; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]			; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]]
	; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]			; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
	; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0			; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
	; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq5(			; X64_1LD-LABEL: @cmp_eq5(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_1LD-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X64_1LD-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP6]]
	; X64_1LD-NEXT: [[TMP8:%.]] = load i8, i8 [[TMP6]]			; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X64_1LD-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP7]]			; X64_1LD-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP8]]
	; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]			; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP7]], [[TMP9]]
	; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq5(			; X64_2LD-LABEL: @cmp_eq5(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_2LD-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X64_2LD-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP6]]
	; X64_2LD-NEXT: [[TMP8:%.]] = load i8, i8 [[TMP6]]			; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32
	; X64_2LD-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP7]]			; X64_2LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32			; X64_2LD-NEXT: [[TMP10:%.]] = load i8, i8 [[TMP9]]
	; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32			; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 5)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

	define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq6(			; X32-LABEL: @cmp_eq6(
	; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X32-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X32-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]]
	; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X32-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X32-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*
	; X32-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X32-NEXT: [[TMP8:%.]] = load i16, i16 [[TMP7]]
	; X32-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i32
	; X32-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X32-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X32-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X32-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i16*
	; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32			; X32-NEXT: [[TMP12:%.]] = load i16, i16 [[TMP11]]
	; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32			; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP12]] to i32
	; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]			; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP9]], [[TMP13]]
	; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]			; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
	; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0			; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
	; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32			; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq6(			; X64_1LD-LABEL: @cmp_eq6(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_1LD-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X64_1LD-NEXT: [[TMP8:%.]] = load i16, i16 [[TMP7]]
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X64_1LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X64_1LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i16*
	; X64_1LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP10]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP8]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq6(			; X64_2LD-LABEL: @cmp_eq6(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_2LD-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X64_2LD-NEXT: [[TMP8:%.]] = load i16, i16 [[TMP7]]
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i32
	; X64_2LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X64_2LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i16*
	; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32			; X64_2LD-NEXT: [[TMP12:%.]] = load i16, i16 [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32			; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP12]] to i32
	; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]			; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP9]], [[TMP13]]
	; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]			; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
	; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0			; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32			; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 6)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

	define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq7(			; X32-LABEL: @cmp_eq7(
	; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X32-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X32-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]]
	; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X32-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X32-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*
	; X32-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X32-NEXT: [[TMP8:%.]] = load i32, i32 [[TMP7]]
	; X32-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X32-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X32-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X32-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i32*
	; X32-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X32-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP10]]
	; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]			; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]]
	; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]			; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
	; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0			; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
	; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq7(			; X64_1LD-LABEL: @cmp_eq7(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_1LD-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X64_1LD-NEXT: [[TMP8:%.]] = load i32, i32 [[TMP7]]
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X64_1LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X64_1LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i32*
	; X64_1LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP10]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq7(			; X64_2LD-LABEL: @cmp_eq7(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_2LD-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X64_2LD-NEXT: [[TMP8:%.]] = load i32, i32 [[TMP7]]
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X64_2LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X64_2LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i32*
	; X64_2LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP10]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

	define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq8(			; X32-LABEL: @cmp_eq8(
	; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X32-NEXT: [[TMP2:%.]] = load i32, i32 [[TMP1]]
	; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X32-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i32
	; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP3]]
	; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]]
	; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X32-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X32-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*
	; X32-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X32-NEXT: [[TMP8:%.]] = load i32, i32 [[TMP7]]
	; X32-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X32-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X32-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X32-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i32*
	; X32-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X32-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP10]]
	; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]			; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]]
	; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]			; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
	; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0			; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
	; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64-LABEL: @cmp_eq8(			; X64-LABEL: @cmp_eq8(
	; X64-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]]
	; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32			; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
	; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0			; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
	; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64-NEXT: ret i32 [[CONV]]			; X64-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

	define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq9(			; X32-LABEL: @cmp_eq9(
	; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 9)			; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 9)
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq9(			; X64_1LD-LABEL: @cmp_eq9(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_1LD-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP6]]
	; X64_1LD-NEXT: [[TMP8:%.]] = load i8, i8 [[TMP6]]			; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_1LD-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP7]]			; X64_1LD-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP8]]
	; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]			; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP7]], [[TMP9]]
	; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq9(			; X64_2LD-LABEL: @cmp_eq9(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_2LD-NEXT: [[TMP7:%.]] = load i8, i8 [[TMP6]]
	; X64_2LD-NEXT: [[TMP8:%.]] = load i8, i8 [[TMP6]]			; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i64
	; X64_2LD-NEXT: [[TMP9:%.]] = load i8, i8 [[TMP7]]			; X64_2LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64			; X64_2LD-NEXT: [[TMP10:%.]] = load i8, i8 [[TMP9]]
	; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64			; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i64
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 9)
	Show All 10 Lines
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq10(			; X64_1LD-LABEL: @cmp_eq10(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_1LD-NEXT: [[TMP8:%.]] = load i16, i16 [[TMP7]]
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X64_1LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_1LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i16*
	; X64_1LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP10]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP8]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq10(			; X64_2LD-LABEL: @cmp_eq10(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_2LD-NEXT: [[TMP8:%.]] = load i16, i16 [[TMP7]]
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i64
	; X64_2LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_2LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i16*
	; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64			; X64_2LD-NEXT: [[TMP12:%.]] = load i16, i16 [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64			; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP12]] to i64
	; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]			; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP9]], [[TMP13]]
	; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]			; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
	; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0			; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
	; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32			; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 10)
	Show All 10 Lines
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq11(			; X64_1LD-LABEL: @cmp_eq11(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X64_1LD-NEXT: [[TMP8:%.]] = load i64, i64 [[TMP7]]
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_1LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i64*
	; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP10]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq11(			; X64_2LD-LABEL: @cmp_eq11(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X64_2LD-NEXT: [[TMP8:%.]] = load i64, i64 [[TMP7]]
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_2LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i64*
	; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP10]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 11)
	Show All 10 Lines
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq12(			; X64_1LD-LABEL: @cmp_eq12(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_1LD-NEXT: [[TMP8:%.]] = load i32, i32 [[TMP7]]
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X64_1LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_1LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i32*
	; X64_1LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP10]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq12(			; X64_2LD-LABEL: @cmp_eq12(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_2LD-NEXT: [[TMP8:%.]] = load i32, i32 [[TMP7]]
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X64_2LD-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
	; X64_2LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_2LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i32*
	; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64			; X64_2LD-NEXT: [[TMP12:%.]] = load i32, i32 [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64			; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64
	; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]			; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP9]], [[TMP13]]
	; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]			; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
	; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0			; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
	; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32			; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 12)
	Show All 10 Lines
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq13(			; X64_1LD-LABEL: @cmp_eq13(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 5			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 5
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 5			; X64_1LD-NEXT: [[TMP8:%.]] = load i64, i64 [[TMP7]]
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_1LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 5
	; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i64*
	; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP10]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq13(			; X64_2LD-LABEL: @cmp_eq13(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 5			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 5
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 5			; X64_2LD-NEXT: [[TMP8:%.]] = load i64, i64 [[TMP7]]
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_2LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 5
	; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i64*
	; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP10]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 13)
	Show All 10 Lines
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq14(			; X64_1LD-LABEL: @cmp_eq14(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 6			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 6
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 6			; X64_1LD-NEXT: [[TMP8:%.]] = load i64, i64 [[TMP7]]
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_1LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 6
	; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i64*
	; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP10]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq14(			; X64_2LD-LABEL: @cmp_eq14(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 6			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 6
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 6			; X64_2LD-NEXT: [[TMP8:%.]] = load i64, i64 [[TMP7]]
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_2LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 6
	; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i64*
	; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP10]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 14)
	Show All 10 Lines
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq15(			; X64_1LD-LABEL: @cmp_eq15(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 7			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 7
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 7			; X64_1LD-NEXT: [[TMP8:%.]] = load i64, i64 [[TMP7]]
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_1LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 7
	; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i64*
	; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP10]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq15(			; X64_2LD-LABEL: @cmp_eq15(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP3]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 7			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 7
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 7			; X64_2LD-NEXT: [[TMP8:%.]] = load i64, i64 [[TMP7]]
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_2LD-NEXT: [[TMP9:%.]] = getelementptr i8, i8 [[Y]], i64 7
	; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = bitcast i8 [[TMP9]] to i64*
	; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP10]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 15)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

	define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq16(			; X32-LABEL: @cmp_eq16(
	; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 16)			; X32-NEXT: [[CALL:%.]] = tail call i32 @memcmp(i8 [[X:%.]], i8 [[Y:%.*]], i64 16)
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64-LABEL: @cmp_eq16(			; X64-LABEL: @cmp_eq16(
	; X64-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i128			; X64-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i128
	; X64-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i128			; X64-NEXT: [[TMP2:%.]] = load i128, i128 [[TMP1]]
	; X64-NEXT: [[TMP3:%.]] = load i128, i128 [[TMP1]]			; X64-NEXT: [[TMP3:%.]] = bitcast i8 [[Y:%.]] to i128
	; X64-NEXT: [[TMP4:%.]] = load i128, i128 [[TMP2]]			; X64-NEXT: [[TMP4:%.]] = load i128, i128 [[TMP3]]
	; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]]			; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP2]], [[TMP4]]
	; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32			; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
	; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0			; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
	; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64-NEXT: ret i32 [[CONV]]			; X64-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

This is an archive of the discontinued LLVM Phabricator instance.

[ExpandMemCmp] Properly constant-fold all compares.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 247895

llvm/lib/CodeGen/ExpandMemCmp.cpp

llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

llvm/test/CodeGen/PowerPC/memcmpIR.ll

llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll

llvm/test/CodeGen/X86/memcmp-optsize.ll

llvm/test/CodeGen/X86/memcmp-pgso.ll

llvm/test/CodeGen/X86/memcmp.ll

llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ExpandMemCmp] Properly constant-fold all compares.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 247895

llvm/lib/CodeGen/ExpandMemCmp.cpp

llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

llvm/test/CodeGen/PowerPC/memcmpIR.ll

llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll

llvm/test/CodeGen/X86/memcmp-optsize.ll

llvm/test/CodeGen/X86/memcmp-pgso.ll

llvm/test/CodeGen/X86/memcmp.ll

llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll

[ExpandMemCmp] Properly constant-fold all compares.
ClosedPublic