diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -103,8 +103,12 @@ Value *getMemCmpExpansionZeroCase(); Value *getMemCmpEqZeroOneBlock(); Value *getMemCmpOneBlock(); - Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType, - uint64_t OffsetBytes); + struct LoadPair { + Value *Lhs = nullptr; + Value *Rhs = nullptr; + }; + LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType, + unsigned OffsetBytes); static LoadEntryVector computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef LoadSizes, @@ -261,18 +265,42 @@ EndBlock->getParent(), EndBlock); } -/// Return a pointer to an element of type `LoadSizeType` at offset -/// `OffsetBytes`. -Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source, - Type *LoadSizeType, - uint64_t OffsetBytes) { - if (OffsetBytes > 0) { - auto *ByteType = Type::getInt8Ty(CI->getContext()); - Source = Builder.CreateConstGEP1_64( - ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()), - OffsetBytes); - } - return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo()); +MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, + bool NeedsBSwap, + Type *CmpSizeType, + unsigned OffsetBytes) { + const auto MakeValue = [this, LoadSizeType, NeedsBSwap, CmpSizeType, + OffsetBytes](int Index) { + // Get the memory source at offset `OffsetBytes`. + Value *Source = CI->getArgOperand(Index); + if (OffsetBytes > 0) { + auto *ByteType = Type::getInt8Ty(CI->getContext()); + Source = Builder.CreateConstGEP1_64( + ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()), + OffsetBytes); + } + Source = Builder.CreateBitCast(Source, LoadSizeType->getPointerTo()); + + // Create a constant or a load from the source. + Value *V = nullptr; + if (auto *C = dyn_cast(Source)) + V = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL); + if (!V) + V = Builder.CreateLoad(LoadSizeType, Source); + + // Swap bytes if required. + if (NeedsBSwap) { + Function *Bswap = Intrinsic::getDeclaration( + CI->getModule(), Intrinsic::bswap, LoadSizeType); + V = Builder.CreateCall(Bswap, V); + } + + // Zero extend if required. + if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) + V = Builder.CreateZExt(V, CmpSizeType); + return V; + }; + return {MakeValue(0), MakeValue(1)}; } // This function creates the IR instructions for loading and comparing 1 byte. @@ -282,18 +310,10 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes) { Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); - Value *Source1 = - getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes); - Value *Source2 = - getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes); - - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); - Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + const LoadPair Loads = + getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false, + Type::getInt32Ty(CI->getContext()), OffsetBytes); + Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs); PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); @@ -340,41 +360,19 @@ : IntegerType::get(CI->getContext(), MaxLoadSize * 8); for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; - - IntegerType *LoadSizeType = - IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - - Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, - CurLoadEntry.Offset); - Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, - CurLoadEntry.Offset); - - // Get a constant or load a value for each source address. - Value *LoadSrc1 = nullptr; - if (auto *Source1C = dyn_cast(Source1)) - LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); - if (!LoadSrc1) - LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - - Value *LoadSrc2 = nullptr; - if (auto *Source2C = dyn_cast(Source2)) - LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); - if (!LoadSrc2) - LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + const LoadPair Loads = getLoadPair( + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), + /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset); if (NumLoads != 1) { - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } // If we have multiple loads per block, we need to generate a composite // comparison using xor+or. - Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs); Diff = Builder.CreateZExt(Diff, MaxLoadType); XorList.push_back(Diff); } else { // If there's only one load per block, we just compare the loaded values. - Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs); } } @@ -451,35 +449,18 @@ Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, - CurLoadEntry.Offset); - Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, - CurLoadEntry.Offset); - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian()) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } - - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } + const LoadPair Loads = + getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType, + CurLoadEntry.Offset); // Add the loaded values to the phi nodes for calculating memcmp result only // if result is not used in a zero equality. if (!IsUsedForZeroCmp) { - ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); - ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]); } - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs); BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) ? EndBlock : LoadCmpBlocks[BlockIndex + 1]; @@ -568,42 +549,27 @@ /// the compare, branch, and phi IR that is required in the general case. Value *MemCmpExpansion::getMemCmpOneBlock() { Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian() && Size != 1) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } + bool NeedsBSwap = DL.isLittleEndian() && Size != 1; + // The i8 and i16 cases don't need compares. We zext the loaded values and + // subtract them to get the suitable negative, zero, or positive i32 result. if (Size < 4) { - // The i8 and i16 cases don't need compares. We zext the loaded values and - // subtract them to get the suitable negative, zero, or positive i32 result. - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); - return Builder.CreateSub(LoadSrc1, LoadSrc2); + const LoadPair Loads = + getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(), + /*Offset*/ 0); + return Builder.CreateSub(Loads.Lhs, Loads.Rhs); } + const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType, + /*Offset*/ 0); // The result of memcmp is negative, zero, or positive, so produce that by // subtracting 2 extended compare bits: sub (ugt, ult). // If a target prefers to use selects to get -1/0/1, they should be able // to transform this later. The inverse transform (going from selects to math) // may not be possible in the DAG because the selects got converted into // branches before we got there. - Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); - Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); + Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs); + Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs); Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); return Builder.CreateSub(ZextUGT, ZextULT); diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -90,27 +90,23 @@ define signext i32 @zeroEqualityTest04() { ; CHECK-LABEL: zeroEqualityTest04: ; CHECK: # %bb.0: -; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest02.buffer1@toc@ha -; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest02.buffer2@toc@ha -; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest02.buffer1@toc@l -; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l -; CHECK-NEXT: ldbrx 3, 0, 6 -; CHECK-NEXT: ldbrx 4, 0, 5 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: bne 0, .LBB3_2 +; CHECK-NEXT: b .LBB3_2 ; CHECK-NEXT: # %bb.1: # %loadbb1 -; CHECK-NEXT: li 4, 8 -; CHECK-NEXT: ldbrx 3, 6, 4 -; CHECK-NEXT: ldbrx 4, 5, 4 +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: li 5, 0 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: beq 0, .LBB3_3 -; CHECK-NEXT: .LBB3_2: # %res_block +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: b .LBB3_4 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: li 4, 3 +; CHECK-NEXT: sldi 3, 3, 58 +; CHECK-NEXT: sldi 4, 4, 56 +; CHECK-NEXT: # %bb.3: # %res_block ; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: li 4, -1 ; CHECK-NEXT: isel 5, 4, 3, 0 -; CHECK-NEXT: .LBB3_3: # %endblock +; CHECK-NEXT: .LBB3_4: # %endblock ; CHECK-NEXT: extsw 3, 5 ; CHECK-NEXT: neg 3, 3 ; CHECK-NEXT: rldicl 3, 3, 1, 63 @@ -126,28 +122,20 @@ define signext i32 @zeroEqualityTest05() { ; CHECK-LABEL: zeroEqualityTest05: ; CHECK: # %bb.0: -; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest03.buffer1@toc@ha -; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest03.buffer2@toc@ha -; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest03.buffer1@toc@l -; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l -; CHECK-NEXT: ldbrx 3, 0, 6 -; CHECK-NEXT: ldbrx 4, 0, 5 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: bne 0, .LBB4_2 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: # %bb.1: # %loadbb1 -; CHECK-NEXT: li 4, 8 -; CHECK-NEXT: ldbrx 3, 6, 4 -; CHECK-NEXT: ldbrx 4, 5, 4 -; CHECK-NEXT: li 5, 0 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: beq 0, .LBB4_3 -; CHECK-NEXT: .LBB4_2: # %res_block +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: lis 3, 768 +; CHECK-NEXT: lis 4, 1024 +; CHECK-NEXT: # %bb.3: # %res_block ; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: isel 5, 4, 3, 0 -; CHECK-NEXT: .LBB4_3: # %endblock -; CHECK-NEXT: nor 3, 5, 5 +; CHECK-NEXT: isel 3, 4, 3, 0 +; CHECK-NEXT: # %bb.4: # %endblock +; CHECK-NEXT: nor 3, 3, 3 ; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16) diff --git a/llvm/test/CodeGen/PowerPC/memcmpIR.ll b/llvm/test/CodeGen/PowerPC/memcmpIR.ll --- a/llvm/test/CodeGen/PowerPC/memcmpIR.ll +++ b/llvm/test/CodeGen/PowerPC/memcmpIR.ll @@ -5,8 +5,9 @@ entry: ; CHECK-LABEL: @test1( ; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64* - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) + ; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.*}} to i64* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]] ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block @@ -21,17 +22,18 @@ ; CHECK-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.*}} to i8* ; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8 ; CHECK-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64* + ; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]] + ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) ; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8 ; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64* - ; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]] ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]] - ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]] ; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block ; CHECK-BE-LABEL: @test1( ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64* + ; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i8* {{.*}} to i64* ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block @@ -46,9 +48,9 @@ ; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.*}} to i8* ; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8 ; CHECK-BE-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64* + ; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]] ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8 ; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64* - ; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]] ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]] ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] ; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block @@ -64,8 +66,9 @@ define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) { ; CHECK-LABEL: @test2( ; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32* - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]]) + ; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.*}} to i32* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]]) ; CHECK-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[BSWAP1]], [[BSWAP2]] ; CHECK-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[BSWAP1]], [[BSWAP2]] @@ -76,6 +79,7 @@ ; CHECK-BE-LABEL: @test2( ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32* + ; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.*}} to i32* ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-BE-NEXT: [[CMP1:%[0-9]+]] = icmp ugt i32 [[LOAD1]], [[LOAD2]] ; CHECK-BE-NEXT: [[CMP2:%[0-9]+]] = icmp ult i32 [[LOAD1]], [[LOAD2]] @@ -93,8 +97,9 @@ define signext i32 @test3(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) { ; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64* - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) + ; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.*}} to i64* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]] ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block @@ -106,33 +111,39 @@ ; CHECK-LABEL: loadbb1:{{.*}} ; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32* - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]]) - ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]]) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64 + ; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 8 + ; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.*}} to i32* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* + ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]]) ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64 ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block ; CHECK-LABEL: loadbb2:{{.*}} ; CHECK: [[LOAD1:%[0-9]+]] = load i16, i16* - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD1]]) - ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]]) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[BSWAP1]] to i64 + ; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 12 + ; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.*}} to i16* + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16* + ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]]) ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[BSWAP2]] to i64 ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block ; CHECK-LABEL: loadbb3:{{.*}} ; CHECK: [[LOAD1:%[0-9]+]] = load i8, i8* - ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8* ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32 + ; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 14 + ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8* ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32 ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]] ; CHECK-NEXT: br label %endblock ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64* + ; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.*}} to i64* ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block @@ -142,23 +153,31 @@ ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 ; CHECK-BE-NEXT: br label %endblock + ; CHECK-BE-LABEL: loadbb1:{{.*}} ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32* - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64 + ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 8 + ; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.*}} to i32* + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64 ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block + ; CHECK-BE-LABEL: loadbb2:{{.*}} ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, i16* - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16* ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64 + ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 12 + ; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* {{.*}} to i16* + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16* ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64 ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block + ; CHECK-BE-LABEL: loadbb3:{{.*}} ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, i8* - ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8* ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32 + ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* {{.*}}, i64 14 + ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8* ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i8 [[LOAD2]] to i32 ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i32 [[ZEXT1]], [[ZEXT2]] ; CHECK-BE-NEXT: br label %endblock diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -69,24 +69,24 @@ define i32 @length2(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: length2: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq @@ -118,14 +118,14 @@ define i1 @length2_lt(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length2_lt: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -134,10 +134,10 @@ ; X64-LABEL: length2_lt: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: shrl $31, %eax @@ -154,10 +154,10 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %ax ; X86-NEXT: movzwl %cx, %ecx +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: subl %eax, %ecx ; X86-NEXT: testl %ecx, %ecx @@ -167,10 +167,10 @@ ; X64-LABEL: length2_gt: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: testl %eax, %eax @@ -235,8 +235,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzwl (%ecx), %esi ; X86-NEXT: rolw $8, %dx +; X86-NEXT: movzwl (%ecx), %esi ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx ; X86-NEXT: jne .LBB9_1 @@ -256,8 +256,8 @@ ; X64-LABEL: length3: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax +; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax ; X64-NEXT: jne .LBB9_1 @@ -310,8 +310,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -322,8 +322,8 @@ ; X64-LABEL: length4: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx @@ -361,8 +361,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -375,8 +375,8 @@ ; X64-LABEL: length4_lt: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx @@ -396,8 +396,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %eax ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %eax ; X86-NEXT: bswapl %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: cmpl %eax, %ecx @@ -410,8 +410,8 @@ ; X64-LABEL: length4_gt: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %ecx ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpl %ecx, %eax @@ -450,8 +450,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %edx +; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx ; X86-NEXT: jne .LBB16_1 @@ -471,8 +471,8 @@ ; X64-LABEL: length5: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB16_1 @@ -526,8 +526,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %edx +; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx ; X86-NEXT: jne .LBB18_1 @@ -549,8 +549,8 @@ ; X64-LABEL: length5_lt: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB18_1 @@ -607,15 +607,15 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: jne .LBB20_2 ; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -632,8 +632,8 @@ ; X64-LABEL: length8: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -830,22 +830,22 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: jne .LBB27_3 ; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: jne .LBB27_3 ; X86-NEXT: # %bb.2: # %loadbb2 ; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl 8(%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -862,15 +862,15 @@ ; X64-LABEL: length12: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB27_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl 8(%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1000,38 +1000,38 @@ ; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl (%edx), %ecx ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl (%eax), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: jne .LBB31_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%edx), %ecx ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl 4(%eax), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: jne .LBB31_4 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx +; X86-NEXT: movl 8(%edx), %ecx ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl 8(%eax), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: jne .LBB31_4 ; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx -; X86-NEXT: movl 12(%eax), %edx +; X86-NEXT: movl 12(%edx), %ecx ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx +; X86-NEXT: movl 12(%eax), %esi +; X86-NEXT: bswapl %esi ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: je .LBB31_5 ; X86-NEXT: .LBB31_4: # %res_block ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: setae %al ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB31_5: # %endblock @@ -1041,15 +1041,15 @@ ; X64-LABEL: length16: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB31_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1175,38 +1175,38 @@ ; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl (%edx), %ecx ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl (%eax), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: jne .LBB33_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx +; X86-NEXT: movl 4(%edx), %ecx ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl 4(%eax), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: jne .LBB33_4 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx +; X86-NEXT: movl 8(%edx), %ecx ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: movl 8(%eax), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: jne .LBB33_4 ; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx -; X86-NEXT: movl 12(%eax), %edx +; X86-NEXT: movl 12(%edx), %ecx ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx +; X86-NEXT: movl 12(%eax), %esi +; X86-NEXT: bswapl %esi ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: je .LBB33_5 ; X86-NEXT: .LBB33_4: # %res_block ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: cmpl %esi, %ecx ; X86-NEXT: setae %al ; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB33_5: # %endblock @@ -1218,15 +1218,15 @@ ; X64-LABEL: length16_lt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB33_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1249,43 +1249,43 @@ ; X86-LABEL: length16_gt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %eax -; X86-NEXT: movl (%edx), %ecx +; X86-NEXT: movl (%edx), %eax ; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl (%ecx), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %eax ; X86-NEXT: jne .LBB34_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %eax -; X86-NEXT: movl 4(%edx), %ecx +; X86-NEXT: movl 4(%edx), %eax ; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl 4(%ecx), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %eax ; X86-NEXT: jne .LBB34_4 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %eax -; X86-NEXT: movl 8(%edx), %ecx +; X86-NEXT: movl 8(%edx), %eax ; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl 8(%ecx), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %eax ; X86-NEXT: jne .LBB34_4 ; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %eax -; X86-NEXT: movl 12(%edx), %ecx +; X86-NEXT: movl 12(%edx), %eax ; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %ecx, %eax +; X86-NEXT: movl 12(%ecx), %esi +; X86-NEXT: bswapl %esi +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: cmpl %esi, %eax ; X86-NEXT: je .LBB34_5 ; X86-NEXT: .LBB34_4: # %res_block -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: setae %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: cmpl %esi, %eax +; X86-NEXT: setae %cl +; X86-NEXT: leal -1(%ecx,%ecx), %ecx ; X86-NEXT: .LBB34_5: # %endblock -; X86-NEXT: testl %edx, %edx +; X86-NEXT: testl %ecx, %ecx ; X86-NEXT: setg %al ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -1293,15 +1293,15 @@ ; X64-LABEL: length16_gt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq (%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB34_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax @@ -1433,22 +1433,22 @@ ; X64-LABEL: length24: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB36_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB36_3 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 16(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1592,22 +1592,22 @@ ; X64-LABEL: length24_lt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB38_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB38_3 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 16(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1642,22 +1642,22 @@ ; X64-LABEL: length24_gt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq (%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB39_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB39_3 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rax -; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax @@ -2187,29 +2187,29 @@ ; X64-LABEL: length32: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB47_4 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB47_4 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 16(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB47_4 ; X64-NEXT: # %bb.3: # %loadbb3 ; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: movq 24(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 24(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -2367,29 +2367,29 @@ ; X64-LABEL: length32_lt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB49_4 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB49_4 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 16(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB49_4 ; X64-NEXT: # %bb.3: # %loadbb3 ; X64-NEXT: movq 24(%rdi), %rcx -; X64-NEXT: movq 24(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 24(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -2424,29 +2424,29 @@ ; X64-LABEL: length32_gt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq (%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB50_4 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB50_4 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rax -; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB50_4 ; X64-NEXT: # %bb.3: # %loadbb3 ; X64-NEXT: movq 24(%rdi), %rax -; X64-NEXT: movq 24(%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq 24(%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -16,24 +16,24 @@ define i32 @length2(i8* %X, i8* %Y) nounwind optsize { ; X86-LABEL: length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: length2: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq @@ -116,8 +116,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzwl (%ecx), %esi ; X86-NEXT: rolw $8, %dx +; X86-NEXT: movzwl (%ecx), %esi ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx ; X86-NEXT: jne .LBB4_1 @@ -137,8 +137,8 @@ ; X64-LABEL: length3: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax +; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax ; X64-NEXT: jne .LBB4_1 @@ -191,8 +191,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -203,8 +203,8 @@ ; X64-LABEL: length4: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx @@ -261,8 +261,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %edx +; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx ; X86-NEXT: jne .LBB9_1 @@ -282,8 +282,8 @@ ; X64-LABEL: length5: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB9_1 @@ -337,15 +337,15 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: jne .LBB11_2 ; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -362,8 +362,8 @@ ; X64-LABEL: length8: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -462,15 +462,15 @@ ; X64-LABEL: length12: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB15_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl 8(%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -502,15 +502,15 @@ ; X64-LABEL: length16: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB16_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1021,24 +1021,24 @@ define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind optsize { ; X86-LABEL: bcmp_length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: bcmp_length2: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -16,24 +16,24 @@ define i32 @length2(i8* %X, i8* %Y) nounwind !prof !14 { ; X86-LABEL: length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: length2: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq @@ -116,8 +116,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzwl (%ecx), %esi ; X86-NEXT: rolw $8, %dx +; X86-NEXT: movzwl (%ecx), %esi ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx ; X86-NEXT: jne .LBB4_1 @@ -137,8 +137,8 @@ ; X64-LABEL: length3: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax +; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax ; X64-NEXT: jne .LBB4_1 @@ -191,8 +191,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -203,8 +203,8 @@ ; X64-LABEL: length4: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx @@ -261,8 +261,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %edx +; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx ; X86-NEXT: jne .LBB9_1 @@ -282,8 +282,8 @@ ; X64-LABEL: length5: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB9_1 @@ -337,15 +337,15 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: jne .LBB11_2 ; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -362,8 +362,8 @@ ; X64-LABEL: length8: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -462,15 +462,15 @@ ; X64-LABEL: length12: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB15_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl 8(%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -502,15 +502,15 @@ ; X64-LABEL: length16: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB16_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1021,24 +1021,24 @@ define i32 @bcmp_length2(i8* %X, i8* %Y) nounwind !prof !14 { ; X86-LABEL: bcmp_length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: bcmp_length2: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -68,24 +68,24 @@ define i32 @length2(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: length2: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq @@ -98,23 +98,17 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: movzwl .L.str+1, %ecx ; X86-NEXT: rolw $8, %ax -; X86-NEXT: rolw $8, %cx ; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: addl $-12594, %eax # imm = 0xCECE ; X86-NEXT: retl ; ; X64-LABEL: length2_const: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: addl $-12594, %eax # imm = 0xCECE ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind ret i32 %m @@ -125,12 +119,9 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: movzwl .L.str+1, %ecx ; X86-NEXT: rolw $8, %ax -; X86-NEXT: rolw $8, %cx ; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: addl $-12594, %eax # imm = 0xCECE ; X86-NEXT: testl %eax, %eax ; X86-NEXT: setg %al ; X86-NEXT: retl @@ -138,12 +129,9 @@ ; X64-LABEL: length2_gt_const: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: addl $-12594, %eax # imm = 0xCECE ; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq @@ -176,14 +164,14 @@ define i1 @length2_lt(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length2_lt: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -192,10 +180,10 @@ ; X64-LABEL: length2_lt: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: shrl $31, %eax @@ -212,10 +200,10 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %ax ; X86-NEXT: movzwl %cx, %ecx +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: rolw $8, %ax ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: subl %eax, %ecx ; X86-NEXT: testl %ecx, %ecx @@ -225,10 +213,10 @@ ; X64-LABEL: length2_gt: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl (%rsi), %ecx +; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %cx, %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: testl %eax, %eax @@ -293,8 +281,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %edx -; X86-NEXT: movzwl (%ecx), %esi ; X86-NEXT: rolw $8, %dx +; X86-NEXT: movzwl (%ecx), %esi ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx ; X86-NEXT: jne .LBB11_1 @@ -314,8 +302,8 @@ ; X64-LABEL: length3: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax +; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax ; X64-NEXT: jne .LBB11_1 @@ -368,8 +356,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -380,8 +368,8 @@ ; X64-LABEL: length4: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx @@ -419,8 +407,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -433,8 +421,8 @@ ; X64-LABEL: length4_lt: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl %edx, %ecx @@ -454,8 +442,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %eax ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %eax ; X86-NEXT: bswapl %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: cmpl %eax, %ecx @@ -468,8 +456,8 @@ ; X64-LABEL: length4_gt: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %ecx ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpl %ecx, %eax @@ -508,8 +496,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %edx +; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx ; X86-NEXT: jne .LBB18_1 @@ -529,8 +517,8 @@ ; X64-LABEL: length5: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB18_1 @@ -584,8 +572,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %edx +; X86-NEXT: movl (%ecx), %esi ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx ; X86-NEXT: jne .LBB20_1 @@ -607,8 +595,8 @@ ; X64-LABEL: length5_lt: ; X64: # %bb.0: # %loadbb ; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax ; X64-NEXT: jne .LBB20_1 @@ -665,15 +653,15 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: jne .LBB22_2 ; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %ecx +; X86-NEXT: movl 4(%eax), %edx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx @@ -690,8 +678,8 @@ ; X64-LABEL: length8: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -873,15 +861,15 @@ ; X64-LABEL: length12: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB29_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: movl 8(%rsi), %edx ; X64-NEXT: bswapl %ecx +; X64-NEXT: movl 8(%rsi), %edx ; X64-NEXT: bswapl %edx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1042,15 +1030,15 @@ ; X64-LABEL: length16: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB35_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1171,15 +1159,15 @@ ; X64-LABEL: length16_lt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: cmpq %rdx, %rcx ; X64-NEXT: jne .LBB37_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx @@ -1214,15 +1202,15 @@ ; X64-LABEL: length16_gt: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq (%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax ; X64-NEXT: jne .LBB38_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax +; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rcx ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpq %rcx, %rax diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -8,14 +8,14 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp2( ; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* -; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] -; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) -; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) -; ALL-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; ALL-NEXT: [[TMP8:%.*]] = zext i16 [[TMP6]] to i32 -; ALL-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP8]] +; ALL-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]] +; ALL-NEXT: [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]]) +; ALL-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32 +; ALL-NEXT: [[TMP5:%.*]] = bitcast i8* [[Y:%.*]] to i16* +; ALL-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP5]] +; ALL-NEXT: [[TMP7:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP6]]) +; ALL-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32 +; ALL-NEXT: [[TMP9:%.*]] = sub i32 [[TMP4]], [[TMP8]] ; ALL-NEXT: ret i32 [[TMP9]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2) @@ -26,28 +26,28 @@ ; ALL-LABEL: @cmp3( ; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: -; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i16 [ [[TMP7:%.*]], [[LOADBB]] ] +; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i16 [ [[TMP5:%.*]], [[LOADBB]] ] ; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i16 [ [[TMP8:%.*]], [[LOADBB]] ] ; ALL-NEXT: [[TMP1:%.*]] = icmp ult i16 [[PHI_SRC1]], [[PHI_SRC2]] ; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; ALL-NEXT: br label [[ENDBLOCK:%.*]] ; ALL: loadbb: ; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i16* -; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; ALL-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]] -; ALL-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP4]] -; ALL-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]]) -; ALL-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]]) -; ALL-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]] +; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]] +; ALL-NEXT: [[TMP5]] = call i16 @llvm.bswap.i16(i16 [[TMP4]]) +; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y:%.*]] to i16* +; ALL-NEXT: [[TMP7:%.*]] = load i16, i16* [[TMP6]] +; ALL-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP7]]) +; ALL-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP5]], [[TMP8]] ; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; ALL: loadbb1: ; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 2 -; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 2 -; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]] -; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]] -; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; ALL-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]] +; ALL-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; ALL-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i64 2 +; ALL-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP13]] +; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP14]] to i32 +; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP12]], [[TMP15]] ; ALL-NEXT: br label [[ENDBLOCK]] ; ALL: endblock: ; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] @@ -60,13 +60,13 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp4( ; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) -; ALL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]] -; ALL-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] +; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; ALL-NEXT: [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) +; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]] +; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) +; ALL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], [[TMP6]] +; ALL-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP3]], [[TMP6]] ; ALL-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 ; ALL-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 ; ALL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] @@ -80,28 +80,28 @@ ; ALL-LABEL: @cmp5( ; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: -; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ] +; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ] ; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ] ; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; ALL-NEXT: br label [[ENDBLOCK:%.*]] ; ALL: loadbb: ; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* -; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] -; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] -; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] +; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; ALL-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; ALL-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]] +; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP7]]) +; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], [[TMP8]] ; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; ALL: loadbb1: ; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]] -; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]] -; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; ALL-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]] +; ALL-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; ALL-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; ALL-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP13]] +; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP14]] to i32 +; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP12]], [[TMP15]] ; ALL-NEXT: br label [[ENDBLOCK]] ; ALL: endblock: ; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] @@ -115,32 +115,32 @@ ; ALL-LABEL: @cmp6( ; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: -; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] +; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] ; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] ; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; ALL-NEXT: br label [[ENDBLOCK:%.*]] ; ALL: loadbb: ; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* -; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] -; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] -; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] +; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; ALL-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; ALL-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; ALL-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]] +; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP7]]) +; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], [[TMP8]] ; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; ALL: loadbb1: ; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; ALL-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16* -; ALL-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16* -; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]] -; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] -; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) -; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32 -; ALL-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i32 -; ALL-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP18]], [[TMP19]] +; ALL-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP11]] +; ALL-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP12]]) +; ALL-NEXT: [[TMP14]] = zext i16 [[TMP13]] to i32 +; ALL-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; ALL-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to i16* +; ALL-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]] +; ALL-NEXT: [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP17]]) +; ALL-NEXT: [[TMP19]] = zext i16 [[TMP18]] to i32 +; ALL-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP14]], [[TMP19]] ; ALL-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; ALL: endblock: ; ALL-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] @@ -163,30 +163,30 @@ ; X32-LABEL: @cmp8( ; X32-NEXT: br label [[LOADBB:%.*]] ; X32: res_block: -; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] +; X32-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1:%.*]] ] ; X32-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] ; X32-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] ; X32-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X32-NEXT: br label [[ENDBLOCK:%.*]] ; X32: loadbb: ; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]] -; X32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]] -; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]]) -; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]]) -; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]] +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X32-NEXT: [[TMP5]] = call i32 @llvm.bswap.i32(i32 [[TMP4]]) +; X32-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X32-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]] +; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP7]]) +; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP5]], [[TMP8]] ; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: ; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X32-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* -; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32* -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]] -; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] -; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]] +; X32-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]] +; X32-NEXT: [[TMP13]] = call i32 @llvm.bswap.i32(i32 [[TMP12]]) +; X32-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X32-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i32* +; X32-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]] +; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP16]]) +; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP13]], [[TMP17]] ; X32-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X32: endblock: ; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] @@ -194,13 +194,13 @@ ; ; X64-LABEL: @cmp8( ; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) -; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]] -; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP5]], [[TMP6]] +; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64-NEXT: [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) +; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]] +; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) +; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP3]], [[TMP6]] +; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP3]], [[TMP6]] ; X64-NEXT: [[TMP9:%.*]] = zext i1 [[TMP7]] to i32 ; X64-NEXT: [[TMP10:%.*]] = zext i1 [[TMP8]] to i32 ; X64-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP10]] @@ -218,28 +218,28 @@ ; X64-LABEL: @cmp9( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ] ; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: ; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] -; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]] +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP5]], [[TMP8]] ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1:%.*]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: ; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]] -; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]] -; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32 -; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32 -; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]] +; X64-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]] +; X64-NEXT: [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 +; X64-NEXT: [[TMP13:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP13]] +; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP14]] to i32 +; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP12]], [[TMP15]] ; X64-NEXT: br label [[ENDBLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ [[TMP16]], [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] @@ -257,32 +257,32 @@ ; X64-LABEL: @cmp10( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] ; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: ; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] -; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]] +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP5]], [[TMP8]] ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: ; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16* -; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16* -; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]] -; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] -; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) -; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64 -; X64-NEXT: [[TMP19]] = zext i16 [[TMP17]] to i64 -; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]] +; X64-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP11]] +; X64-NEXT: [[TMP13:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP12]]) +; X64-NEXT: [[TMP14]] = zext i16 [[TMP13]] to i64 +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to i16* +; X64-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]] +; X64-NEXT: [[TMP18:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP17]]) +; X64-NEXT: [[TMP19]] = zext i16 [[TMP18]] to i64 +; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP14]], [[TMP19]] ; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] @@ -309,32 +309,32 @@ ; X64-LABEL: @cmp12( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP18:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP14:%.*]], [[LOADBB1:%.*]] ] ; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP19:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: ; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] -; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]] +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP5]], [[TMP8]] ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: ; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* -; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32* -; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]] -; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] -; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) -; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) -; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64 -; X64-NEXT: [[TMP19]] = zext i32 [[TMP17]] to i64 -; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP18]], [[TMP19]] +; X64-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]] +; X64-NEXT: [[TMP13:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP12]]) +; X64-NEXT: [[TMP14]] = zext i32 [[TMP13]] to i64 +; X64-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to i32* +; X64-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]] +; X64-NEXT: [[TMP18:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP17]]) +; X64-NEXT: [[TMP19]] = zext i32 [[TMP18]] to i64 +; X64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP14]], [[TMP19]] ; X64-NEXT: br i1 [[TMP20]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] @@ -379,30 +379,30 @@ ; X64-LABEL: @cmp16( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ], [ [[TMP16:%.*]], [[LOADBB1:%.*]] ] +; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP5:%.*]], [[LOADBB]] ], [ [[TMP13:%.*]], [[LOADBB1:%.*]] ] ; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ], [ [[TMP17:%.*]], [[LOADBB1]] ] ; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: ; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]] -; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]] -; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]]) -; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]]) -; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]] +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP5]] = call i64 @llvm.bswap.i64(i64 [[TMP4]]) +; X64-NEXT: [[TMP6:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]] +; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP7]]) +; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP5]], [[TMP8]] ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: ; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i64* -; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i64* -; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP11]] -; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]] -; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) -; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) -; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]] +; X64-NEXT: [[TMP12:%.*]] = load i64, i64* [[TMP11]] +; X64-NEXT: [[TMP13]] = call i64 @llvm.bswap.i64(i64 [[TMP12]]) +; X64-NEXT: [[TMP14:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i64* +; X64-NEXT: [[TMP16:%.*]] = load i64, i64* [[TMP15]] +; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP16]]) +; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP13]], [[TMP17]] ; X64-NEXT: br i1 [[TMP18]], label [[ENDBLOCK]], label [[RES_BLOCK]] ; X64: endblock: ; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ [[TMP2]], [[RES_BLOCK]] ] @@ -415,10 +415,10 @@ define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq2( ; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* -; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] -; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] +; ALL-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]] +; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i16* +; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]] +; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP2]], [[TMP4]] ; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 @@ -433,17 +433,17 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq3( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* -; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] -; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]] +; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i16* +; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]] +; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP2]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2 -; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16 -; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 -; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] +; X32-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]] +; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i16 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 2 +; X32-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i16 +; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP8]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]] ; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0 ; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -457,17 +457,17 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i16* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] +; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]] +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 2 +; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP8]] +; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP7]], [[TMP9]] ; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -477,17 +477,17 @@ ; ; X64_2LD-LABEL: @cmp_eq3( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i16, i16* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i16* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16 -; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]] +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i16 +; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 2 +; X64_2LD-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i16 +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP8]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -504,10 +504,10 @@ define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; ALL-LABEL: @cmp_eq4( ; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; ALL-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP2]], [[TMP4]] ; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 ; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 @@ -522,17 +522,17 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq5( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32 -; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 -; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] +; X32-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]] +; X32-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X32-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]] +; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32 +; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] ; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 ; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -546,17 +546,17 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] +; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]] +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP8]] +; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP7]], [[TMP9]] ; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -566,17 +566,17 @@ ; ; X64_2LD-LABEL: @cmp_eq5( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32 -; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]] +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 +; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X64_2LD-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32 +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -593,19 +593,19 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq6( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] -; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] -; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] +; X32-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP7]] +; X32-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i32 +; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X32-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16* +; X32-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP11]] +; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP12]] to i32 +; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP9]], [[TMP13]] ; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] ; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 ; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 @@ -619,19 +619,19 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP7]] +; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X64_1LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i16* +; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP10]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP8]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -641,19 +641,19 @@ ; ; X64_2LD-LABEL: @cmp_eq6( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i32 +; X64_2LD-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X64_2LD-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16* +; X64_2LD-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP11]] +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP12]] to i32 +; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP9]], [[TMP13]] ; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] ; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 @@ -670,17 +670,17 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq7( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 ; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] -; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] -; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] +; X32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]] +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] ; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 ; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -694,19 +694,19 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 ; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]] +; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X64_1LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -716,17 +716,17 @@ ; ; X64_2LD-LABEL: @cmp_eq7( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 ; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X64_2LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -743,17 +743,17 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) { ; X32-LABEL: @cmp_eq8( ; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32* -; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* -; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] -; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] +; X32-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]] +; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i32* +; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]] +; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP2]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] -; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] -; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] +; X32-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]] +; X32-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X32-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]] +; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP8]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] ; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 ; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -763,10 +763,10 @@ ; ; X64-LABEL: @cmp_eq8( ; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]] ; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 ; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 @@ -791,17 +791,17 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] +; X64_1LD-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]] +; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP8]] +; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP7]], [[TMP9]] ; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -811,17 +811,17 @@ ; ; X64_2LD-LABEL: @cmp_eq9( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] -; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64 -; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64 -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]] +; X64_2LD-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i64 +; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP10:%.*]] = load i8, i8* [[TMP9]] +; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i64 +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -848,19 +848,19 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP7]] +; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i16* +; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP10]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP8]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -870,19 +870,19 @@ ; ; X64_2LD-LABEL: @cmp_eq10( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i16 [[TMP8]] to i64 +; X64_2LD-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16* +; X64_2LD-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP11]] +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP12]] to i64 +; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP9]], [[TMP13]] ; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]] ; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 ; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 @@ -909,19 +909,19 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 ; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X64_1LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -931,17 +931,17 @@ ; ; X64_2LD-LABEL: @cmp_eq11( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 ; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X64_2LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -968,19 +968,19 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]] +; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP8]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -990,19 +990,19 @@ ; ; X64_2LD-LABEL: @cmp_eq12( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] -; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64 -; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 -; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +; X64_2LD-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* +; X64_2LD-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]] +; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP9]], [[TMP13]] ; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]] ; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 ; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 @@ -1029,19 +1029,19 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5 ; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 5 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 5 +; X64_1LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -1051,17 +1051,17 @@ ; ; X64_2LD-LABEL: @cmp_eq13( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5 ; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 5 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 5 +; X64_2LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -1088,19 +1088,19 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6 ; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 6 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 6 +; X64_1LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -1110,17 +1110,17 @@ ; ; X64_2LD-LABEL: @cmp_eq14( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6 ; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 6 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 6 +; X64_2LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -1147,19 +1147,19 @@ ; X64_1LD-NEXT: br label [[ENDBLOCK:%.*]] ; X64_1LD: loadbb: ; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_1LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP2]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7 ; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 7 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] -; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 7 +; X64_1LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP8]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] @@ -1169,17 +1169,17 @@ ; ; X64_2LD-LABEL: @cmp_eq15( ; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64* -; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* -; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] -; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] +; X64_2LD-NEXT: [[TMP2:%.*]] = load i64, i64* [[TMP1]] +; X64_2LD-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i64* +; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]] +; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP2]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7 ; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 7 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] -; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] +; X64_2LD-NEXT: [[TMP8:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP9:%.*]] = getelementptr i8, i8* [[Y]], i64 7 +; X64_2LD-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]] +; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP8]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] ; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 @@ -1202,10 +1202,10 @@ ; ; X64-LABEL: @cmp_eq16( ; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i128* -; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i128* -; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]] -; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]] -; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP2:%.*]] = load i128, i128* [[TMP1]] +; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[Y:%.*]] to i128* +; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP3]] +; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP2]], [[TMP4]] ; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 ; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32