Diff 248430

llvm/lib/CodeGen/ExpandMemCmp.cpp

Show First 20 Lines • Show All 97 Lines • ▼ Show 20 Lines	class MemCmpExpansion {
void emitLoadCompareBlock(unsigned BlockIndex);		void emitLoadCompareBlock(unsigned BlockIndex);
void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,		void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
unsigned &LoadIndex);		unsigned &LoadIndex);
void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);		void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
void emitMemCmpResultBlock();		void emitMemCmpResultBlock();
Value *getMemCmpExpansionZeroCase();		Value *getMemCmpExpansionZeroCase();
Value *getMemCmpEqZeroOneBlock();		Value *getMemCmpEqZeroOneBlock();
Value *getMemCmpOneBlock();		Value *getMemCmpOneBlock();
Value getPtrToElementAtOffset(Value Source, Type *LoadSizeType,		struct LoadPair {
uint64_t OffsetBytes);		Value *Lhs = nullptr;
		Value *Rhs = nullptr;
		};
		LoadPair getLoadPair(Type LoadSizeType, bool NeedsBSwap, Type CmpSizeType,
		unsigned OffsetBytes);

static LoadEntryVector		static LoadEntryVector
computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,		computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);		unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
static LoadEntryVector		static LoadEntryVector
computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,		computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
unsigned MaxNumLoads,		unsigned MaxNumLoads,
unsigned &NumLoadsNonOneByte);		unsigned &NumLoadsNonOneByte);
▲ Show 20 Lines • Show All 140 Lines • ▼ Show 20 Lines	void MemCmpExpansion::createLoadCmpBlocks() {
}		}
}		}

void MemCmpExpansion::createResultBlock() {		void MemCmpExpansion::createResultBlock() {
ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",		ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
EndBlock->getParent(), EndBlock);		EndBlock->getParent(), EndBlock);
}		}

/// Return a pointer to an element of type `LoadSizeType` at offset		MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
/// `OffsetBytes`.		bool NeedsBSwap,
Value MemCmpExpansion::getPtrToElementAtOffset(Value Source,		Type *CmpSizeType,
Type *LoadSizeType,		unsigned OffsetBytes) {
uint64_t OffsetBytes) {		// Get the memory source at offset `OffsetBytes`.
		Value *LhsSource = CI->getArgOperand(0);
		Value *RhsSource = CI->getArgOperand(1);
if (OffsetBytes > 0) {		if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());		auto *ByteType = Type::getInt8Ty(CI->getContext());
Source = Builder.CreateConstGEP1_64(		LhsSource = Builder.CreateConstGEP1_64(
ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()),		ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()),
OffsetBytes);		OffsetBytes);
		RhsSource = Builder.CreateConstGEP1_64(
		ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
		OffsetBytes);
		}
		LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
		RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());

		// Create a constant or a load from the source.
		Value *Lhs = nullptr;
		if (auto *C = dyn_cast<Constant>(LhsSource))
		Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
		if (!Lhs)
		Lhs = Builder.CreateLoad(LoadSizeType, LhsSource);

		Value *Rhs = nullptr;
		if (auto *C = dyn_cast<Constant>(RhsSource))
		Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
		if (!Rhs)
		Rhs = Builder.CreateLoad(LoadSizeType, RhsSource);

		// Swap bytes if required.
		if (NeedsBSwap) {
		Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
		Intrinsic::bswap, LoadSizeType);
		Lhs = Builder.CreateCall(Bswap, Lhs);
		Rhs = Builder.CreateCall(Bswap, Rhs);
}		}
return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo());
		// Zero extend if required.
		if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
		Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
		Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
		}
		return {Lhs, Rhs};
}		}

// This function creates the IR instructions for loading and comparing 1 byte.		// This function creates the IR instructions for loading and comparing 1 byte.
// It loads 1 byte from each source of the memcmp parameters with the given		// It loads 1 byte from each source of the memcmp parameters with the given
// GEPIndex. It then subtracts the two loaded values and adds this result to the		// GEPIndex. It then subtracts the two loaded values and adds this result to the
// final phi node for selecting the memcmp result.		// final phi node for selecting the memcmp result.
void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,		void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
unsigned OffsetBytes) {		unsigned OffsetBytes) {
Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);		Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
Type *LoadSizeType = Type::getInt8Ty(CI->getContext());		const LoadPair Loads =
Value *Source1 =		getLoadPair(Type::getInt8Ty(CI->getContext()), /NeedsBSwap=/false,
getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes);		Type::getInt32Ty(CI->getContext()), OffsetBytes);
Value *Source2 =		Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes);

Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);

PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);		PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]);

if (BlockIndex < (LoadCmpBlocks.size() - 1)) {		if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
// Early exit branch if difference found to EndBlock. Otherwise, continue to		// Early exit branch if difference found to EndBlock. Otherwise, continue to
// next LoadCmpBlock,		// next LoadCmpBlock,
Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,		Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
ConstantInt::get(Diff->getType(), 0));		ConstantInt::get(Diff->getType(), 0));
Show All 30 Lines	Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
// If we have multiple loads per block, we need to generate a composite		// If we have multiple loads per block, we need to generate a composite
// comparison using xor+or. The type for the combinations is the largest load		// comparison using xor+or. The type for the combinations is the largest load
// type.		// type.
IntegerType *const MaxLoadType =		IntegerType *const MaxLoadType =
NumLoads == 1 ? nullptr		NumLoads == 1 ? nullptr
: IntegerType::get(CI->getContext(), MaxLoadSize * 8);		: IntegerType::get(CI->getContext(), MaxLoadSize * 8);
for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {		for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];		const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
		const LoadPair Loads = getLoadPair(
IntegerType *LoadSizeType =		IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);		/NeedsBSwap=/false, MaxLoadType, CurLoadEntry.Offset);

Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,
CurLoadEntry.Offset);
Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
CurLoadEntry.Offset);

// Get a constant or load a value for each source address.
Value *LoadSrc1 = nullptr;
if (auto *Source1C = dyn_cast<Constant>(Source1))
LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
if (!LoadSrc1)
LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);

Value *LoadSrc2 = nullptr;
if (auto *Source2C = dyn_cast<Constant>(Source2))
LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
if (!LoadSrc2)
LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

if (NumLoads != 1) {		if (NumLoads != 1) {
if (LoadSizeType != MaxLoadType) {
LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}
// If we have multiple loads per block, we need to generate a composite		// If we have multiple loads per block, we need to generate a composite
// comparison using xor+or.		// comparison using xor+or.
Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);		Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
Diff = Builder.CreateZExt(Diff, MaxLoadType);		Diff = Builder.CreateZExt(Diff, MaxLoadType);
XorList.push_back(Diff);		XorList.push_back(Diff);
} else {		} else {
// If there's only one load per block, we just compare the loaded values.		// If there's only one load per block, we just compare the loaded values.
Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);		Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
}		}
}		}

auto pairWiseOr = [&](std::vector<Value > &InList) -> std::vector<Value > {		auto pairWiseOr = [&](std::vector<Value > &InList) -> std::vector<Value > {
std::vector<Value *> OutList;		std::vector<Value *> OutList;
for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {		for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);		Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
OutList.push_back(Or);		OutList.push_back(Or);
▲ Show 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {

Type *LoadSizeType =		Type *LoadSizeType =
IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);		IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
Type MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize 8);		Type MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize 8);
assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");		assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");

Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);		Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);

Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType,		const LoadPair Loads =
CurLoadEntry.Offset);		getLoadPair(LoadSizeType, /NeedsBSwap=/DL.isLittleEndian(), MaxLoadType,
Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType,
CurLoadEntry.Offset);		CurLoadEntry.Offset);

// Load LoadSizeType from the base address.
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

if (DL.isLittleEndian()) {
Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::bswap, LoadSizeType);
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
}

if (LoadSizeType != MaxLoadType) {
LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
}

// Add the loaded values to the phi nodes for calculating memcmp result only		// Add the loaded values to the phi nodes for calculating memcmp result only
// if result is not used in a zero equality.		// if result is not used in a zero equality.
if (!IsUsedForZeroCmp) {		if (!IsUsedForZeroCmp) {
ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]);		ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]);		ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
}		}

Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2);		Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))		BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
? EndBlock		? EndBlock
: LoadCmpBlocks[BlockIndex + 1];		: LoadCmpBlocks[BlockIndex + 1];
// Early exit branch if difference found to ResultBlock. Otherwise, continue		// Early exit branch if difference found to ResultBlock. Otherwise, continue
// to next LoadCmpBlock or EndBlock.		// to next LoadCmpBlock or EndBlock.
BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);		BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
Builder.Insert(CmpBr);		Builder.Insert(CmpBr);

▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines	Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
assert(LoadIndex == getNumLoads() && "some entries were not consumed");		assert(LoadIndex == getNumLoads() && "some entries were not consumed");
return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));		return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
}		}

/// A memcmp expansion that only has one block of load and compare can bypass		/// A memcmp expansion that only has one block of load and compare can bypass
/// the compare, branch, and phi IR that is required in the general case.		/// the compare, branch, and phi IR that is required in the general case.
Value *MemCmpExpansion::getMemCmpOneBlock() {		Value *MemCmpExpansion::getMemCmpOneBlock() {
Type LoadSizeType = IntegerType::get(CI->getContext(), Size 8);		Type LoadSizeType = IntegerType::get(CI->getContext(), Size 8);
Value *Source1 = CI->getArgOperand(0);		bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
Value *Source2 = CI->getArgOperand(1);

// Cast source to LoadSizeType*.
if (Source1->getType() != LoadSizeType)
Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
if (Source2->getType() != LoadSizeType)
Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());

// Load LoadSizeType from the base address.
Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);

if (DL.isLittleEndian() && Size != 1) {
Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
Intrinsic::bswap, LoadSizeType);
LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
}

if (Size < 4) {
// The i8 and i16 cases don't need compares. We zext the loaded values and		// The i8 and i16 cases don't need compares. We zext the loaded values and
// subtract them to get the suitable negative, zero, or positive i32 result.		// subtract them to get the suitable negative, zero, or positive i32 result.
LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty());		if (Size < 4) {
LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty());		const LoadPair Loads =
return Builder.CreateSub(LoadSrc1, LoadSrc2);		getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
		/Offset/ 0);
		return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
}		}

		const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
		/Offset/ 0);
// The result of memcmp is negative, zero, or positive, so produce that by		// The result of memcmp is negative, zero, or positive, so produce that by
// subtracting 2 extended compare bits: sub (ugt, ult).		// subtracting 2 extended compare bits: sub (ugt, ult).
// If a target prefers to use selects to get -1/0/1, they should be able		// If a target prefers to use selects to get -1/0/1, they should be able
// to transform this later. The inverse transform (going from selects to math)		// to transform this later. The inverse transform (going from selects to math)
// may not be possible in the DAG because the selects got converted into		// may not be possible in the DAG because the selects got converted into
// branches before we got there.		// branches before we got there.
Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2);		Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2);		Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());		Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());		Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
return Builder.CreateSub(ZextUGT, ZextULT);		return Builder.CreateSub(ZextUGT, ZextULT);
}		}

// This function expands the memcmp call into an inline expansion and returns		// This function expands the memcmp call into an inline expansion and returns
// the memcmp result.		// the memcmp result.
Value *MemCmpExpansion::getMemCmpExpansion() {		Value *MemCmpExpansion::getMemCmpExpansion() {
▲ Show 20 Lines • Show All 275 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

Show First 20 Lines • Show All 84 Lines • ▼ Show 20 Lines	; CHECK-NEXT: blr
%cond = zext i1 %not.lnot to i32		%cond = zext i1 %not.lnot to i32
ret i32 %cond		ret i32 %cond
}		}

; Validate with > 0		; Validate with > 0
define signext i32 @zeroEqualityTest04() {		define signext i32 @zeroEqualityTest04() {
; CHECK-LABEL: zeroEqualityTest04:		; CHECK-LABEL: zeroEqualityTest04:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest02.buffer1@toc@ha		; CHECK-NEXT: b .LBB3_2
courbetAuthorUnsubmitted Done Reply Inline Actions These are also real changes. courbet: These are also real changes.
; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest02.buffer2@toc@ha
; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest02.buffer1@toc@l
; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l
; CHECK-NEXT: ldbrx 3, 0, 6
; CHECK-NEXT: ldbrx 4, 0, 5
; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: bne 0, .LBB3_2
; CHECK-NEXT: # %bb.1: # %loadbb1		; CHECK-NEXT: # %bb.1: # %loadbb1
; CHECK-NEXT: li 4, 8		; CHECK-NEXT: li 3, 0
; CHECK-NEXT: ldbrx 3, 6, 4
; CHECK-NEXT: ldbrx 4, 5, 4
; CHECK-NEXT: li 5, 0		; CHECK-NEXT: li 5, 0
; CHECK-NEXT: cmpld 3, 4		; CHECK-NEXT: li 4, 0
; CHECK-NEXT: beq 0, .LBB3_3		; CHECK-NEXT: b .LBB3_4
; CHECK-NEXT: .LBB3_2: # %res_block		; CHECK-NEXT: .LBB3_2:
		; CHECK-NEXT: li 3, 1
		; CHECK-NEXT: li 4, 3
		; CHECK-NEXT: sldi 3, 3, 58
		; CHECK-NEXT: sldi 4, 4, 56
		; CHECK-NEXT: # %bb.3: # %res_block
; CHECK-NEXT: cmpld 3, 4		; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: li 3, 1		; CHECK-NEXT: li 3, 1
; CHECK-NEXT: li 4, -1		; CHECK-NEXT: li 4, -1
; CHECK-NEXT: isel 5, 4, 3, 0		; CHECK-NEXT: isel 5, 4, 3, 0
; CHECK-NEXT: .LBB3_3: # %endblock		; CHECK-NEXT: .LBB3_4: # %endblock
; CHECK-NEXT: extsw 3, 5		; CHECK-NEXT: extsw 3, 5
; CHECK-NEXT: neg 3, 3		; CHECK-NEXT: neg 3, 3
; CHECK-NEXT: rldicl 3, 3, 1, 63		; CHECK-NEXT: rldicl 3, 3, 1, 63
; CHECK-NEXT: xori 3, 3, 1		; CHECK-NEXT: xori 3, 3, 1
; CHECK-NEXT: blr		; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8), i8 bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)		%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8), i8 bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16)
%not.cmp = icmp slt i32 %call, 1		%not.cmp = icmp slt i32 %call, 1
%. = zext i1 %not.cmp to i32		%. = zext i1 %not.cmp to i32
ret i32 %.		ret i32 %.
}		}

; Validate with < 0		; Validate with < 0
define signext i32 @zeroEqualityTest05() {		define signext i32 @zeroEqualityTest05() {
; CHECK-LABEL: zeroEqualityTest05:		; CHECK-LABEL: zeroEqualityTest05:
; CHECK: # %bb.0:		; CHECK: # %bb.0:
; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest03.buffer1@toc@ha		; CHECK-NEXT: li 3, 0
; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest03.buffer2@toc@ha		; CHECK-NEXT: li 4, 0
; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest03.buffer1@toc@l
; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l
; CHECK-NEXT: ldbrx 3, 0, 6
; CHECK-NEXT: ldbrx 4, 0, 5
; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: bne 0, .LBB4_2
; CHECK-NEXT: # %bb.1: # %loadbb1		; CHECK-NEXT: # %bb.1: # %loadbb1
; CHECK-NEXT: li 4, 8		; CHECK-NEXT: li 3, 0
; CHECK-NEXT: ldbrx 3, 6, 4		; CHECK-NEXT: # %bb.2:
; CHECK-NEXT: ldbrx 4, 5, 4		; CHECK-NEXT: lis 3, 768
; CHECK-NEXT: li 5, 0		; CHECK-NEXT: lis 4, 1024
; CHECK-NEXT: cmpld 3, 4		; CHECK-NEXT: # %bb.3: # %res_block
; CHECK-NEXT: beq 0, .LBB4_3
; CHECK-NEXT: .LBB4_2: # %res_block
; CHECK-NEXT: cmpld 3, 4		; CHECK-NEXT: cmpld 3, 4
; CHECK-NEXT: li 3, 1		; CHECK-NEXT: li 3, 1
; CHECK-NEXT: li 4, -1		; CHECK-NEXT: li 4, -1
; CHECK-NEXT: isel 5, 4, 3, 0		; CHECK-NEXT: isel 3, 4, 3, 0
; CHECK-NEXT: .LBB4_3: # %endblock		; CHECK-NEXT: # %bb.4: # %endblock
; CHECK-NEXT: nor 3, 5, 5		; CHECK-NEXT: nor 3, 3, 3
; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31		; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31
; CHECK-NEXT: blr		; CHECK-NEXT: blr
%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8), i8 bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16)		%call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8), i8 bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16)
%call.lobit = lshr i32 %call, 31		%call.lobit = lshr i32 %call, 31
%call.lobit.not = xor i32 %call.lobit, 1		%call.lobit.not = xor i32 %call.lobit, 1
ret i32 %call.lobit.not		ret i32 %call.lobit.not
}		}

▲ Show 20 Lines • Show All 60 Lines • Show Last 20 Lines

llvm/test/CodeGen/PowerPC/memcmpIR.ll

Show All 14 Lines	entry:
; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64		; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64
; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1		; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
; CHECK-NEXT: br label %endblock		; CHECK-NEXT: br label %endblock

; CHECK-LABEL: loadbb1:{{.*}}		; CHECK-LABEL: loadbb1:{{.*}}
; CHECK: [[BCC1:%[0-9]+]] = bitcast i32* {{.}} to i8		; CHECK: [[BCC1:%[0-9]+]] = bitcast i32* {{.}} to i8
; CHECK-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.}} to i8		; CHECK-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.}} to i8
; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8		; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8
; CHECK-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8		; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8
		; CHECK-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*		; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*
; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]		; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]		; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]
; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])		; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]])
; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])		; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]])
; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]		; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]]
; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block		; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block

; CHECK-BE-LABEL: @test1(		; CHECK-BE-LABEL: @test1(
; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*		; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64*
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*		; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64*
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]		; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block		; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block

; CHECK-BE-LABEL: res_block:{{.*}}		; CHECK-BE-LABEL: res_block:{{.*}}
; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64		; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64
; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1		; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1
; CHECK-BE-NEXT: br label %endblock		; CHECK-BE-NEXT: br label %endblock

; CHECK-BE-LABEL: loadbb1:{{.*}}		; CHECK-BE-LABEL: loadbb1:{{.*}}
; CHECK-BE: [[BCC1:%[0-9]+]] = bitcast i32* {{.}} to i8		; CHECK-BE: [[BCC1:%[0-9]+]] = bitcast i32* {{.}} to i8
; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.}} to i8		; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.}} to i8
; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8		; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8
; CHECK-BE-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8		; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8
		; CHECK-BE-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64*
; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*		; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64*
; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]		; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]]
; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]		; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]]
; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]		; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block		; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block

%0 = bitcast i32* %buffer1 to i8*		%0 = bitcast i32* %buffer1 to i8*
%1 = bitcast i32* %buffer2 to i8*		%1 = bitcast i32* %buffer2 to i8*
▲ Show 20 Lines • Show All 135 Lines • Show Last 20 Lines

llvm/test/CodeGen/X86/memcmp.ll

Show First 20 Lines • Show All 92 Lines • ▼ Show 20 Lines	; X64-NEXT: retq
ret i32 %m		ret i32 %m
}		}

define i32 @length2_const(i8* %X, i8* %Y) nounwind {		define i32 @length2_const(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_const:		; X86-LABEL: length2_const:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax		; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl .L.str+1, %ecx
; X86-NEXT: rolw $8, %ax		; X86-NEXT: rolw $8, %ax
; X86-NEXT: rolw $8, %cx
; X86-NEXT: movzwl %ax, %eax		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl %cx, %ecx		; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
		courbetAuthorUnsubmitted Done Reply Inline Actions This is the real change. courbet: This is the real change.
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_const:		; X64-LABEL: length2_const:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
ret i32 %m		ret i32 %m
}		}

define i1 @length2_gt_const(i8* %X, i8* %Y) nounwind {		define i1 @length2_gt_const(i8* %X, i8* %Y) nounwind {
; X86-LABEL: length2_gt_const:		; X86-LABEL: length2_gt_const:
; X86: # %bb.0:		; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax		; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl (%eax), %eax		; X86-NEXT: movzwl (%eax), %eax
; X86-NEXT: movzwl .L.str+1, %ecx
; X86-NEXT: rolw $8, %ax		; X86-NEXT: rolw $8, %ax
; X86-NEXT: rolw $8, %cx
; X86-NEXT: movzwl %ax, %eax		; X86-NEXT: movzwl %ax, %eax
; X86-NEXT: movzwl %cx, %ecx		; X86-NEXT: addl $-12594, %eax # imm = 0xCECE
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: testl %eax, %eax		; X86-NEXT: testl %eax, %eax
; X86-NEXT: setg %al		; X86-NEXT: setg %al
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length2_gt_const:		; X64-LABEL: length2_gt_const:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movzwl (%rdi), %eax		; X64-NEXT: movzwl (%rdi), %eax
; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx
; X64-NEXT: rolw $8, %ax		; X64-NEXT: rolw $8, %ax
; X64-NEXT: rolw $8, %cx
; X64-NEXT: movzwl %ax, %eax		; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzwl %cx, %ecx		; X64-NEXT: addl $-12594, %eax # imm = 0xCECE
; X64-NEXT: subl %ecx, %eax
; X64-NEXT: testl %eax, %eax		; X64-NEXT: testl %eax, %eax
; X64-NEXT: setg %al		; X64-NEXT: setg %al
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind
%c = icmp sgt i32 %m, 0		%c = icmp sgt i32 %m, 0
ret i1 %c		ret i1 %c
}		}

▲ Show 20 Lines • Show All 275 Lines • ▼ Show 20 Lines
; X86-NEXT: # kill: def $al killed $al killed $eax		; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl		; X86-NEXT: retl
;		;
; X64-LABEL: length4_lt:		; X64-LABEL: length4_lt:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %ecx		; X64-NEXT: movl (%rdi), %ecx
; X64-NEXT: movl (%rsi), %edx		; X64-NEXT: movl (%rsi), %edx
; X64-NEXT: bswapl %ecx		; X64-NEXT: bswapl %ecx
; X64-NEXT: bswapl %edx		; X64-NEXT: bswapl %edx
		efriedmaUnsubmitted Not Done Reply Inline Actions The scheduling here seems to be worse? efriedma: The scheduling here seems to be worse?
		courbetAuthorUnsubmitted Done Reply Inline Actions I think we could argue either way: the new scheduling interleaves loads and other computations, evening out the port pressure and increased compute parallelism. On the other hand it's true that there is less data parallelism. It should not matter with recent out-of-order cores anyway. If you feel strongly about this I can go back to interleaving the data, this is actually orthogonal to the real change. courbet: I think we could argue either way: the new scheduling interleaves loads and other computations…
		efriedmaUnsubmitted Not Done Reply Inline Actions I'd prefer to avoid unrelated changes. the new scheduling interleaves loads and other computations Scheduling an arithmetic operation that uses a loaded value immediately after the load is never going to work out; loads have latency. Granted, I agree it's unlikely to matter much on a modern x86 core. Really, I'm more surprised we aren't trying to do any scheduling at all after isel. efriedma: I'd prefer to avoid unrelated changes. > the new scheduling interleaves loads and other…
; X64-NEXT: xorl %eax, %eax		; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %edx, %ecx		; X64-NEXT: cmpl %edx, %ecx
; X64-NEXT: seta %al		; X64-NEXT: seta %al
; X64-NEXT: sbbl $0, %eax		; X64-NEXT: sbbl $0, %eax
; X64-NEXT: shrl $31, %eax		; X64-NEXT: shrl $31, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq		; X64-NEXT: retq
%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind		%m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind
▲ Show 20 Lines • Show All 4,353 Lines • Show Last 20 Lines

llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll

	Show First 20 Lines • Show All 125 Lines • ▼ Show 20 Lines
	; ALL-NEXT: [[TMP5:%.]] = load i32, i32 [[TMP3]]			; ALL-NEXT: [[TMP5:%.]] = load i32, i32 [[TMP3]]
	; ALL-NEXT: [[TMP6:%.]] = load i32, i32 [[TMP4]]			; ALL-NEXT: [[TMP6:%.]] = load i32, i32 [[TMP4]]
	; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])			; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
	; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])			; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
	; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]			; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
	; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; ALL: loadbb1:			; ALL: loadbb1:
	; ALL-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4			; ALL-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4
	; ALL-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i16*			; ALL-NEXT: [[TMP11:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; ALL-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 4			; ALL-NEXT: [[TMP12:%.]] = bitcast i8 [[TMP10]] to i16*
	; ALL-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i16*			; ALL-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP11]] to i16*
	; ALL-NEXT: [[TMP14:%.]] = load i16, i16 [[TMP11]]			; ALL-NEXT: [[TMP14:%.]] = load i16, i16 [[TMP12]]
	; ALL-NEXT: [[TMP15:%.]] = load i16, i16 [[TMP13]]			; ALL-NEXT: [[TMP15:%.]] = load i16, i16 [[TMP13]]
	; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])			; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
	; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])			; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
	; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32			; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
	; ALL-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32			; ALL-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32
	; ALL-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]]			; ALL-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]]
	; ALL-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; ALL-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; ALL: endblock:			; ALL: endblock:
	Show All 28 Lines
	; X32-NEXT: [[TMP5:%.]] = load i32, i32 [[TMP3]]			; X32-NEXT: [[TMP5:%.]] = load i32, i32 [[TMP3]]
	; X32-NEXT: [[TMP6:%.]] = load i32, i32 [[TMP4]]			; X32-NEXT: [[TMP6:%.]] = load i32, i32 [[TMP4]]
	; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])			; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
	; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])			; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
	; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]			; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
	; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; X32: loadbb1:			; X32: loadbb1:
	; X32-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4			; X32-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X32-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i32*			; X32-NEXT: [[TMP11:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X32-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X32-NEXT: [[TMP12:%.]] = bitcast i8 [[TMP10]] to i32*
	; X32-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i32*			; X32-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP11]] to i32*
	; X32-NEXT: [[TMP14:%.]] = load i32, i32 [[TMP11]]			; X32-NEXT: [[TMP14:%.]] = load i32, i32 [[TMP12]]
	; X32-NEXT: [[TMP15:%.]] = load i32, i32 [[TMP13]]			; X32-NEXT: [[TMP15:%.]] = load i32, i32 [[TMP13]]
	; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])			; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
	; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])			; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
	; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]			; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
	; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; X32: endblock:			; X32: endblock:
	; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; X32-NEXT: ret i32 [[PHI_RES]]			; X32-NEXT: ret i32 [[PHI_RES]]
	▲ Show 20 Lines • Show All 74 Lines • ▼ Show 20 Lines
	; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]			; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]
	; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]			; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]
	; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])			; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
	; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])			; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
	; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]			; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
	; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; X64: loadbb1:			; X64: loadbb1:
	; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i16*			; X64-NEXT: [[TMP11:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64-NEXT: [[TMP12:%.]] = bitcast i8 [[TMP10]] to i16*
	; X64-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i16*			; X64-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP11]] to i16*
	; X64-NEXT: [[TMP14:%.]] = load i16, i16 [[TMP11]]			; X64-NEXT: [[TMP14:%.]] = load i16, i16 [[TMP12]]
	; X64-NEXT: [[TMP15:%.]] = load i16, i16 [[TMP13]]			; X64-NEXT: [[TMP15:%.]] = load i16, i16 [[TMP13]]
	; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])			; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
	; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])			; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
	; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64			; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64
	; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64			; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64
	; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]			; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]
	; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; X64: endblock:			; X64: endblock:
	Show All 32 Lines
	; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]			; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]
	; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]			; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]
	; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])			; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
	; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])			; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
	; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]			; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
	; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; X64: loadbb1:			; X64: loadbb1:
	; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i32*			; X64-NEXT: [[TMP11:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64-NEXT: [[TMP12:%.]] = bitcast i8 [[TMP10]] to i32*
	; X64-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i32*			; X64-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP11]] to i32*
	; X64-NEXT: [[TMP14:%.]] = load i32, i32 [[TMP11]]			; X64-NEXT: [[TMP14:%.]] = load i32, i32 [[TMP12]]
	; X64-NEXT: [[TMP15:%.]] = load i32, i32 [[TMP13]]			; X64-NEXT: [[TMP15:%.]] = load i32, i32 [[TMP13]]
	; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])			; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
	; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])			; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
	; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64			; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64
	; X64-NEXT: [[TMP19]] = zext i32 [[TMP17]] to i64			; X64-NEXT: [[TMP19]] = zext i32 [[TMP17]] to i64
	; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]			; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]]
	; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; X64: endblock:			; X64: endblock:
	▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines
	; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]			; X64-NEXT: [[TMP5:%.]] = load i64, i64 [[TMP3]]
	; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]			; X64-NEXT: [[TMP6:%.]] = load i64, i64 [[TMP4]]
	; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])			; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
	; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])			; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
	; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]			; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
	; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]			; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]]
	; X64: loadbb1:			; X64: loadbb1:
	; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64-NEXT: [[TMP10:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64-NEXT: [[TMP11:%.]] = bitcast i8 [[TMP10]] to i64*			; X64-NEXT: [[TMP11:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64-NEXT: [[TMP12:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64-NEXT: [[TMP12:%.]] = bitcast i8 [[TMP10]] to i64*
	; X64-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP12]] to i64*			; X64-NEXT: [[TMP13:%.]] = bitcast i8 [[TMP11]] to i64*
	; X64-NEXT: [[TMP14:%.]] = load i64, i64 [[TMP11]]			; X64-NEXT: [[TMP14:%.]] = load i64, i64 [[TMP12]]
	; X64-NEXT: [[TMP15:%.]] = load i64, i64 [[TMP13]]			; X64-NEXT: [[TMP15:%.]] = load i64, i64 [[TMP13]]
	; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])			; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
	; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])			; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
	; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]			; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
	; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]			; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]]
	; X64: endblock:			; X64: endblock:
	; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]			; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ]
	; X64-NEXT: ret i32 [[PHI_RES]]			; X64-NEXT: ret i32 [[PHI_RES]]
	▲ Show 20 Lines • Show All 183 Lines • ▼ Show 20 Lines
	define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq6(			; X32-LABEL: @cmp_eq6(
	; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32
	; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]
	; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]
	; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
	; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X32-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X32-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X32-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X32-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i16*
	; X32-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X32-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i16*
	; X32-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X32-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP8]]
	; X32-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X32-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]
	; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32			; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
	; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32			; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
	; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]			; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
	; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]			; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
	; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0			; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
	; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32			; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq6(			; X64_1LD-LABEL: @cmp_eq6(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_1LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]
	; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X64_1LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i16*
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i16*
	; X64_1LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP8]]
	; X64_1LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq6(			; X64_2LD-LABEL: @cmp_eq6(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_2LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]
	; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X64_2LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i16*
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i16*
	; X64_2LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP8]]
	; X64_2LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32			; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
	; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32			; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
	; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]			; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
	; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]			; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]]
	; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0			; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32			; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
	Show All 9 Lines
	define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq7(			; X32-LABEL: @cmp_eq7(
	; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32
	; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]
	; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]
	; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
	; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X32-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X32-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X32-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X32-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i32*
	; X32-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X32-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i32*
	; X32-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X32-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP8]]
	; X32-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X32-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]
	; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]			; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
	; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]			; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
	; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0			; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
	; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	;			;
	; X64_1LD-LABEL: @cmp_eq7(			; X64_1LD-LABEL: @cmp_eq7(
	; X64_1LD-NEXT: br label [[LOADBB:%.*]]			; X64_1LD-NEXT: br label [[LOADBB:%.*]]
	; X64_1LD: res_block:			; X64_1LD: res_block:
	; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]			; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]]
	; X64_1LD: loadbb:			; X64_1LD: loadbb:
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_1LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]
	; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X64_1LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i32*
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i32*
	; X64_1LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP8]]
	; X64_1LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq7(			; X64_2LD-LABEL: @cmp_eq7(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32
	; X64_2LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]
	; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X64_2LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i32*
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i32*
	; X64_2LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP8]]
	; X64_2LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	;			;
	%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)			%call = tail call i32 @memcmp(i8* %x, i8* %y, i64 7)
	%cmp = icmp eq i32 %call, 0			%cmp = icmp eq i32 %call, 0
	%conv = zext i1 %cmp to i32			%conv = zext i1 %cmp to i32
	ret i32 %conv			ret i32 %conv
	}			}

	define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {			define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
	; X32-LABEL: @cmp_eq8(			; X32-LABEL: @cmp_eq8(
	; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32			; X32-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i32
	; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32			; X32-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i32
	; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]			; X32-NEXT: [[TMP3:%.]] = load i32, i32 [[TMP1]]
	; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]			; X32-NEXT: [[TMP4:%.]] = load i32, i32 [[TMP2]]
	; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]			; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
	; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4			; X32-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 4
	; X32-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X32-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 4
	; X32-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 4			; X32-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i32*
	; X32-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X32-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i32*
	; X32-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X32-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP8]]
	; X32-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X32-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]
	; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]			; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
	; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]			; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
	; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0			; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
	; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X32-NEXT: ret i32 [[CONV]]			; X32-NEXT: ret i32 [[CONV]]
	▲ Show 20 Lines • Show All 87 Lines • ▼ Show 20 Lines
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_1LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i16*
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i16*
	; X64_1LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP8]]
	; X64_1LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq10(			; X64_2LD-LABEL: @cmp_eq10(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i16*			; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_2LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i16*
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i16*			; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i16*
	; X64_2LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = load i16, i16 [[TMP8]]
	; X64_2LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i16, i16 [[TMP9]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64			; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
	; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64			; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
	; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]			; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
	; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]			; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
	; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0			; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
	; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32			; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
	Show All 21 Lines
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X64_1LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i64*
	; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP8]]
	; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq11(			; X64_2LD-LABEL: @cmp_eq11(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 3
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 3
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 3			; X64_2LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i64*
	; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP8]]
	; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	Show All 19 Lines
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_1LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i32*
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i32*
	; X64_1LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP8]]
	; X64_1LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq12(			; X64_2LD-LABEL: @cmp_eq12(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 8
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i32*			; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 8
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 8			; X64_2LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i32*
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i32*			; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i32*
	; X64_2LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = load i32, i32 [[TMP8]]
	; X64_2LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i32, i32 [[TMP9]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64			; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
	; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64			; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
	; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]			; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
	; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]			; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]]
	; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0			; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0
	; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32			; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0
	Show All 21 Lines
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 5			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 5
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 5
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 5			; X64_1LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i64*
	; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP8]]
	; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq13(			; X64_2LD-LABEL: @cmp_eq13(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 5			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 5
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 5
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 5			; X64_2LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i64*
	; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP8]]
	; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	Show All 19 Lines
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 6			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 6
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 6
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 6			; X64_1LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i64*
	; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP8]]
	; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq14(			; X64_2LD-LABEL: @cmp_eq14(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 6			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 6
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 6
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 6			; X64_2LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i64*
	; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP8]]
	; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	Show All 19 Lines
	; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_1LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_1LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_1LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_1LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]			; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
	; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]			; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.]], label [[LOADBB1:%.]]
	; X64_1LD: loadbb1:			; X64_1LD: loadbb1:
	; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 7			; X64_1LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 7
	; X64_1LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_1LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 7
	; X64_1LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 7			; X64_1LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_1LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i64*
	; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_1LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP8]]
	; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_1LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]
	; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]			; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
	; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]			; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
	; X64_1LD: endblock:			; X64_1LD: endblock:
	; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]			; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ]
	; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0			; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0
	; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_1LD-NEXT: ret i32 [[CONV]]			; X64_1LD-NEXT: ret i32 [[CONV]]
	;			;
	; X64_2LD-LABEL: @cmp_eq15(			; X64_2LD-LABEL: @cmp_eq15(
	; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64			; X64_2LD-NEXT: [[TMP1:%.]] = bitcast i8 [[X:%.]] to i64
	; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64			; X64_2LD-NEXT: [[TMP2:%.]] = bitcast i8 [[Y:%.]] to i64
	; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]			; X64_2LD-NEXT: [[TMP3:%.]] = load i64, i64 [[TMP1]]
	; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]			; X64_2LD-NEXT: [[TMP4:%.]] = load i64, i64 [[TMP2]]
	; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]			; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
	; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 7			; X64_2LD-NEXT: [[TMP6:%.]] = getelementptr i8, i8 [[X]], i64 7
	; X64_2LD-NEXT: [[TMP7:%.]] = bitcast i8 [[TMP6]] to i64*			; X64_2LD-NEXT: [[TMP7:%.]] = getelementptr i8, i8 [[Y]], i64 7
	; X64_2LD-NEXT: [[TMP8:%.]] = getelementptr i8, i8 [[Y]], i64 7			; X64_2LD-NEXT: [[TMP8:%.]] = bitcast i8 [[TMP6]] to i64*
	; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP8]] to i64*			; X64_2LD-NEXT: [[TMP9:%.]] = bitcast i8 [[TMP7]] to i64*
	; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP7]]			; X64_2LD-NEXT: [[TMP10:%.]] = load i64, i64 [[TMP8]]
	; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]			; X64_2LD-NEXT: [[TMP11:%.]] = load i64, i64 [[TMP9]]
	; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]			; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
	; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]			; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
	; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0			; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
	; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32			; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32
	; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0			; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0
	; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32			; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32
	; X64_2LD-NEXT: ret i32 [[CONV]]			; X64_2LD-NEXT: ret i32 [[CONV]]
	Show All 31 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[ExpandMemCmp] Properly constant-fold all compares.
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 248430

llvm/lib/CodeGen/ExpandMemCmp.cpp

llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

llvm/test/CodeGen/PowerPC/memcmpIR.ll

llvm/test/CodeGen/X86/memcmp.ll

llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll

This is an archive of the discontinued LLVM Phabricator instance.

[ExpandMemCmp] Properly constant-fold all compares.ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 248430

llvm/lib/CodeGen/ExpandMemCmp.cpp

llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll

llvm/test/CodeGen/PowerPC/memcmpIR.ll

llvm/test/CodeGen/X86/memcmp.ll

llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll

[ExpandMemCmp] Properly constant-fold all compares.
ClosedPublic