diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SizeOpts.h" using namespace llvm; @@ -103,8 +104,12 @@ Value *getMemCmpExpansionZeroCase(); Value *getMemCmpEqZeroOneBlock(); Value *getMemCmpOneBlock(); - Value *getPtrToElementAtOffset(Value *Source, Type *LoadSizeType, - uint64_t OffsetBytes); + struct LoadPair { + Value *Lhs = nullptr; + Value *Rhs = nullptr; + }; + LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType, + unsigned OffsetBytes); static LoadEntryVector computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef LoadSizes, @@ -261,18 +266,52 @@ EndBlock->getParent(), EndBlock); } -/// Return a pointer to an element of type `LoadSizeType` at offset -/// `OffsetBytes`. -Value *MemCmpExpansion::getPtrToElementAtOffset(Value *Source, - Type *LoadSizeType, - uint64_t OffsetBytes) { +MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType, + bool NeedsBSwap, + Type *CmpSizeType, + unsigned OffsetBytes) { + // Get the memory source at offset `OffsetBytes`. + Value *LhsSource = CI->getArgOperand(0); + Value *RhsSource = CI->getArgOperand(1); if (OffsetBytes > 0) { auto *ByteType = Type::getInt8Ty(CI->getContext()); - Source = Builder.CreateConstGEP1_64( - ByteType, Builder.CreateBitCast(Source, ByteType->getPointerTo()), + LhsSource = Builder.CreateConstGEP1_64( + ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()), + OffsetBytes); + RhsSource = Builder.CreateConstGEP1_64( + ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()), OffsetBytes); } - return Builder.CreateBitCast(Source, LoadSizeType->getPointerTo()); + LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo()); + RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo()); + + // Create a constant or a load from the source. + Value *Lhs = nullptr; + if (auto *C = dyn_cast(LhsSource)) + Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL); + if (!Lhs) + Lhs = Builder.CreateLoad(LoadSizeType, LhsSource); + + Value *Rhs = nullptr; + if (auto *C = dyn_cast(RhsSource)) + Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL); + if (!Rhs) + Rhs = Builder.CreateLoad(LoadSizeType, RhsSource); + + // Swap bytes if required. + if (NeedsBSwap) { + Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), + Intrinsic::bswap, LoadSizeType); + Lhs = Builder.CreateCall(Bswap, Lhs); + Rhs = Builder.CreateCall(Bswap, Rhs); + } + + // Zero extend if required. + if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) { + Lhs = Builder.CreateZExt(Lhs, CmpSizeType); + Rhs = Builder.CreateZExt(Rhs, CmpSizeType); + } + return {Lhs, Rhs}; } // This function creates the IR instructions for loading and comparing 1 byte. @@ -282,18 +321,10 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes) { Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - Type *LoadSizeType = Type::getInt8Ty(CI->getContext()); - Value *Source1 = - getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, OffsetBytes); - Value *Source2 = - getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, OffsetBytes); - - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext())); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext())); - Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); + const LoadPair Loads = + getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false, + Type::getInt32Ty(CI->getContext()), OffsetBytes); + Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs); PhiRes->addIncoming(Diff, LoadCmpBlocks[BlockIndex]); @@ -340,41 +371,19 @@ : IntegerType::get(CI->getContext(), MaxLoadSize * 8); for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) { const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; - - IntegerType *LoadSizeType = - IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); - - Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, - CurLoadEntry.Offset); - Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, - CurLoadEntry.Offset); - - // Get a constant or load a value for each source address. - Value *LoadSrc1 = nullptr; - if (auto *Source1C = dyn_cast(Source1)) - LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL); - if (!LoadSrc1) - LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - - Value *LoadSrc2 = nullptr; - if (auto *Source2C = dyn_cast(Source2)) - LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL); - if (!LoadSrc2) - LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); + const LoadPair Loads = getLoadPair( + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), + /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset); if (NumLoads != 1) { - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } // If we have multiple loads per block, we need to generate a composite // comparison using xor+or. - Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs); Diff = Builder.CreateZExt(Diff, MaxLoadType); XorList.push_back(Diff); } else { // If there's only one load per block, we just compare the loaded values. - Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs); } } @@ -451,35 +460,18 @@ Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); - Value *Source1 = getPtrToElementAtOffset(CI->getArgOperand(0), LoadSizeType, - CurLoadEntry.Offset); - Value *Source2 = getPtrToElementAtOffset(CI->getArgOperand(1), LoadSizeType, - CurLoadEntry.Offset); - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian()) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } - - if (LoadSizeType != MaxLoadType) { - LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType); - } + const LoadPair Loads = + getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType, + CurLoadEntry.Offset); // Add the loaded values to the phi nodes for calculating memcmp result only // if result is not used in a zero equality. if (!IsUsedForZeroCmp) { - ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[BlockIndex]); - ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]); + ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]); } - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs); BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) ? EndBlock : LoadCmpBlocks[BlockIndex + 1]; @@ -568,42 +560,27 @@ /// the compare, branch, and phi IR that is required in the general case. Value *MemCmpExpansion::getMemCmpOneBlock() { Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); - Value *Source1 = CI->getArgOperand(0); - Value *Source2 = CI->getArgOperand(1); - - // Cast source to LoadSizeType*. - if (Source1->getType() != LoadSizeType) - Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo()); - if (Source2->getType() != LoadSizeType) - Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo()); - - // Load LoadSizeType from the base address. - Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1); - Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2); - - if (DL.isLittleEndian() && Size != 1) { - Function *Bswap = Intrinsic::getDeclaration(CI->getModule(), - Intrinsic::bswap, LoadSizeType); - LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1); - LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2); - } + bool NeedsBSwap = DL.isLittleEndian() && Size != 1; + // The i8 and i16 cases don't need compares. We zext the loaded values and + // subtract them to get the suitable negative, zero, or positive i32 result. if (Size < 4) { - // The i8 and i16 cases don't need compares. We zext the loaded values and - // subtract them to get the suitable negative, zero, or positive i32 result. - LoadSrc1 = Builder.CreateZExt(LoadSrc1, Builder.getInt32Ty()); - LoadSrc2 = Builder.CreateZExt(LoadSrc2, Builder.getInt32Ty()); - return Builder.CreateSub(LoadSrc1, LoadSrc2); + const LoadPair Loads = + getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(), + /*Offset*/ 0); + return Builder.CreateSub(Loads.Lhs, Loads.Rhs); } + const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType, + /*Offset*/ 0); // The result of memcmp is negative, zero, or positive, so produce that by // subtracting 2 extended compare bits: sub (ugt, ult). // If a target prefers to use selects to get -1/0/1, they should be able // to transform this later. The inverse transform (going from selects to math) // may not be possible in the DAG because the selects got converted into // branches before we got there. - Value *CmpUGT = Builder.CreateICmpUGT(LoadSrc1, LoadSrc2); - Value *CmpULT = Builder.CreateICmpULT(LoadSrc1, LoadSrc2); + Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs); + Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs); Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty()); Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty()); return Builder.CreateSub(ZextUGT, ZextULT); @@ -869,6 +846,9 @@ ++BBIt; } } + if (MadeChanges) + for (BasicBlock &BB : F) + SimplifyInstructionsInBlock(&BB); return MadeChanges ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -89,32 +89,8 @@ ; Validate with > 0 define signext i32 @zeroEqualityTest04() { ; CHECK-LABEL: zeroEqualityTest04: -; CHECK: # %bb.0: -; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest02.buffer1@toc@ha -; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest02.buffer2@toc@ha -; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest02.buffer1@toc@l -; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l -; CHECK-NEXT: ldbrx 3, 0, 6 -; CHECK-NEXT: ldbrx 4, 0, 5 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: bne 0, .LBB3_2 -; CHECK-NEXT: # %bb.1: # %loadbb1 -; CHECK-NEXT: li 4, 8 -; CHECK-NEXT: ldbrx 3, 6, 4 -; CHECK-NEXT: ldbrx 4, 5, 4 -; CHECK-NEXT: li 5, 0 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: beq 0, .LBB3_3 -; CHECK-NEXT: .LBB3_2: # %res_block -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: isel 5, 4, 3, 0 -; CHECK-NEXT: .LBB3_3: # %endblock -; CHECK-NEXT: extsw 3, 5 -; CHECK-NEXT: neg 3, 3 -; CHECK-NEXT: rldicl 3, 3, 1, 63 -; CHECK-NEXT: xori 3, 3, 1 +; CHECK: # %bb.0: # %loadbb +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16) %not.cmp = icmp slt i32 %call, 1 @@ -125,30 +101,8 @@ ; Validate with < 0 define signext i32 @zeroEqualityTest05() { ; CHECK-LABEL: zeroEqualityTest05: -; CHECK: # %bb.0: -; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest03.buffer1@toc@ha -; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest03.buffer2@toc@ha -; CHECK-NEXT: addi 6, 3, .LzeroEqualityTest03.buffer1@toc@l -; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l -; CHECK-NEXT: ldbrx 3, 0, 6 -; CHECK-NEXT: ldbrx 4, 0, 5 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: bne 0, .LBB4_2 -; CHECK-NEXT: # %bb.1: # %loadbb1 -; CHECK-NEXT: li 4, 8 -; CHECK-NEXT: ldbrx 3, 6, 4 -; CHECK-NEXT: ldbrx 4, 5, 4 -; CHECK-NEXT: li 5, 0 -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: beq 0, .LBB4_3 -; CHECK-NEXT: .LBB4_2: # %res_block -; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: li 4, -1 -; CHECK-NEXT: isel 5, 4, 3, 0 -; CHECK-NEXT: .LBB4_3: # %endblock -; CHECK-NEXT: nor 3, 5, 5 -; CHECK-NEXT: rlwinm 3, 3, 1, 31, 31 +; CHECK: # %bb.0: # %loadbb +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16) %call.lobit = lshr i32 %call, 31 diff --git a/llvm/test/CodeGen/PowerPC/memcmpIR.ll b/llvm/test/CodeGen/PowerPC/memcmpIR.ll --- a/llvm/test/CodeGen/PowerPC/memcmpIR.ll +++ b/llvm/test/CodeGen/PowerPC/memcmpIR.ll @@ -20,8 +20,8 @@ ; CHECK: [[BCC1:%[0-9]+]] = bitcast i32* {{.*}} to i8* ; CHECK-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.*}} to i8* ; CHECK-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8 - ; CHECK-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64* ; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8 + ; CHECK-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64* ; CHECK-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64* ; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]] ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]] @@ -45,8 +45,8 @@ ; CHECK-BE: [[BCC1:%[0-9]+]] = bitcast i32* {{.*}} to i8* ; CHECK-BE-NEXT: [[BCC2:%[0-9]+]] = bitcast i32* {{.*}} to i8* ; CHECK-BE-NEXT: [[GEP1:%[0-9]+]] = getelementptr i8, i8* [[BCC2]], i64 8 - ; CHECK-BE-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64* ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i8, i8* [[BCC1]], i64 8 + ; CHECK-BE-NEXT: [[BCL1:%[0-9]+]] = bitcast i8* [[GEP1]] to i64* ; CHECK-BE-NEXT: [[BCL2:%[0-9]+]] = bitcast i8* [[GEP2]] to i64* ; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[BCL1]] ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[BCL2]] diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -230,7 +230,7 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length3: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -239,14 +239,14 @@ ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB9_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB9_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 2(%eax), %eax ; X86-NEXT: movzbl 2(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB9_1: # %res_block +; X86-NEXT: .LBB9_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax @@ -254,19 +254,19 @@ ; X86-NEXT: retl ; ; X64-LABEL: length3: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB9_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB9_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 2(%rdi), %eax ; X64-NEXT: movzbl 2(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB9_1: # %res_block +; X64-NEXT: .LBB9_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax @@ -445,7 +445,7 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length5: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -454,14 +454,14 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB16_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB16_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 4(%eax), %eax ; X86-NEXT: movzbl 4(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB16_1: # %res_block +; X86-NEXT: .LBB16_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax @@ -469,19 +469,19 @@ ; X86-NEXT: retl ; ; X64-LABEL: length5: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB16_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB16_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax ; X64-NEXT: movzbl 4(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB16_1: # %res_block +; X64-NEXT: .LBB16_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax @@ -521,7 +521,7 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length5_lt: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -530,38 +530,38 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB18_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 4(%eax), %eax ; X86-NEXT: movzbl 4(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB18_3 -; X86-NEXT: .LBB18_1: # %res_block +; X86-NEXT: jmp .LBB18_2 +; X86-NEXT: .LBB18_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB18_3: # %endblock +; X86-NEXT: .LBB18_2: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length5_lt: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB18_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB18_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax ; X64-NEXT: movzbl 4(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq -; X64-NEXT: .LBB18_1: # %res_block +; X64-NEXT: .LBB18_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -111,7 +111,7 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize { ; X86-LABEL: length3: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -120,34 +120,34 @@ ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB4_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB4_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 2(%eax), %eax ; X86-NEXT: movzbl 2(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB4_3 -; X86-NEXT: .LBB4_1: # %res_block +; X86-NEXT: jmp .LBB4_2 +; X86-NEXT: .LBB4_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB4_3: # %endblock +; X86-NEXT: .LBB4_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length3: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB4_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB4_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 2(%rdi), %eax ; X64-NEXT: movzbl 2(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB4_1: # %res_block +; X64-NEXT: .LBB4_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax @@ -256,7 +256,7 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize { ; X86-LABEL: length5: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -265,34 +265,34 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB9_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB9_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 4(%eax), %eax ; X86-NEXT: movzbl 4(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB9_3 -; X86-NEXT: .LBB9_1: # %res_block +; X86-NEXT: jmp .LBB9_2 +; X86-NEXT: .LBB9_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB9_3: # %endblock +; X86-NEXT: .LBB9_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length5: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB9_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB9_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax ; X64-NEXT: movzbl 4(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB9_1: # %res_block +; X64-NEXT: .LBB9_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -111,7 +111,7 @@ define i32 @length3(i8* %X, i8* %Y) nounwind !prof !14 { ; X86-LABEL: length3: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -120,34 +120,34 @@ ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB4_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB4_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 2(%eax), %eax ; X86-NEXT: movzbl 2(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB4_3 -; X86-NEXT: .LBB4_1: # %res_block +; X86-NEXT: jmp .LBB4_2 +; X86-NEXT: .LBB4_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB4_3: # %endblock +; X86-NEXT: .LBB4_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length3: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB4_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB4_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 2(%rdi), %eax ; X64-NEXT: movzbl 2(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB4_1: # %res_block +; X64-NEXT: .LBB4_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax @@ -256,7 +256,7 @@ define i32 @length5(i8* %X, i8* %Y) nounwind !prof !14 { ; X86-LABEL: length5: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -265,34 +265,34 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB9_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB9_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 4(%eax), %eax ; X86-NEXT: movzbl 4(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB9_3 -; X86-NEXT: .LBB9_1: # %res_block +; X86-NEXT: jmp .LBB9_2 +; X86-NEXT: .LBB9_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB9_3: # %endblock +; X86-NEXT: .LBB9_2: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length5: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB9_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB9_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax ; X64-NEXT: movzbl 4(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB9_1: # %res_block +; X64-NEXT: .LBB9_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -98,23 +98,17 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: movzwl .L.str+1, %ecx ; X86-NEXT: rolw $8, %ax -; X86-NEXT: rolw $8, %cx ; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: addl $-12594, %eax # imm = 0xCECE ; X86-NEXT: retl ; ; X64-LABEL: length2_const: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: addl $-12594, %eax # imm = 0xCECE ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind ret i32 %m @@ -125,12 +119,9 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: movzwl .L.str+1, %ecx ; X86-NEXT: rolw $8, %ax -; X86-NEXT: rolw $8, %cx ; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: subl %ecx, %eax +; X86-NEXT: addl $-12594, %eax # imm = 0xCECE ; X86-NEXT: testl %eax, %eax ; X86-NEXT: setg %al ; X86-NEXT: retl @@ -138,12 +129,9 @@ ; X64-LABEL: length2_gt_const: ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax -; X64-NEXT: movzwl .L.str+{{.*}}(%rip), %ecx ; X64-NEXT: rolw $8, %ax -; X64-NEXT: rolw $8, %cx ; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax +; X64-NEXT: addl $-12594, %eax # imm = 0xCECE ; X64-NEXT: testl %eax, %eax ; X64-NEXT: setg %al ; X64-NEXT: retq @@ -288,7 +276,7 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length3: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -297,14 +285,14 @@ ; X86-NEXT: rolw $8, %dx ; X86-NEXT: rolw $8, %si ; X86-NEXT: cmpw %si, %dx -; X86-NEXT: jne .LBB11_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB11_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 2(%eax), %eax ; X86-NEXT: movzbl 2(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB11_1: # %res_block +; X86-NEXT: .LBB11_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax @@ -312,19 +300,19 @@ ; X86-NEXT: retl ; ; X64-LABEL: length3: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx ; X64-NEXT: cmpw %cx, %ax -; X64-NEXT: jne .LBB11_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB11_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 2(%rdi), %eax ; X64-NEXT: movzbl 2(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB11_1: # %res_block +; X64-NEXT: .LBB11_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax @@ -503,7 +491,7 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length5: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -512,14 +500,14 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB18_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 4(%eax), %eax ; X86-NEXT: movzbl 4(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl -; X86-NEXT: .LBB18_1: # %res_block +; X86-NEXT: .LBB18_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax @@ -527,19 +515,19 @@ ; X86-NEXT: retl ; ; X64-LABEL: length5: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB18_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB18_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax ; X64-NEXT: movzbl 4(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq -; X64-NEXT: .LBB18_1: # %res_block +; X64-NEXT: .LBB18_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax @@ -579,7 +567,7 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length5_lt: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -588,38 +576,38 @@ ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB20_1 -; X86-NEXT: # %bb.2: # %loadbb1 +; X86-NEXT: jne .LBB20_3 +; X86-NEXT: # %bb.1: # %loadbb1 ; X86-NEXT: movzbl 4(%eax), %eax ; X86-NEXT: movzbl 4(%ecx), %ecx ; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB20_3 -; X86-NEXT: .LBB20_1: # %res_block +; X86-NEXT: jmp .LBB20_2 +; X86-NEXT: .LBB20_3: # %res_block ; X86-NEXT: setae %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: leal -1(%eax,%eax), %eax -; X86-NEXT: .LBB20_3: # %endblock +; X86-NEXT: .LBB20_2: # %endblock ; X86-NEXT: shrl $31, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length5_lt: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB20_1 -; X64-NEXT: # %bb.2: # %loadbb1 +; X64-NEXT: jne .LBB20_3 +; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movzbl 4(%rdi), %eax ; X64-NEXT: movzbl 4(%rsi), %ecx ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: shrl $31, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq -; X64-NEXT: .LBB20_1: # %res_block +; X64-NEXT: .LBB20_3: # %res_block ; X64-NEXT: setae %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: leal -1(%rax,%rax), %eax diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -26,9 +26,7 @@ ; ALL-LABEL: @cmp3( ; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: -; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i16 [ [[TMP7:%.*]], [[LOADBB]] ] -; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i16 [ [[TMP8:%.*]], [[LOADBB]] ] -; ALL-NEXT: [[TMP1:%.*]] = icmp ult i16 [[PHI_SRC1]], [[PHI_SRC2]] +; ALL-NEXT: [[TMP1:%.*]] = icmp ult i16 [[TMP7:%.*]], [[TMP8:%.*]] ; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; ALL-NEXT: br label [[ENDBLOCK:%.*]] ; ALL: loadbb: @@ -80,9 +78,7 @@ ; ALL-LABEL: @cmp5( ; ALL-NEXT: br label [[LOADBB:%.*]] ; ALL: res_block: -; ALL-NEXT: [[PHI_SRC1:%.*]] = phi i32 [ [[TMP7:%.*]], [[LOADBB]] ] -; ALL-NEXT: [[PHI_SRC2:%.*]] = phi i32 [ [[TMP8:%.*]], [[LOADBB]] ] -; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[PHI_SRC1]], [[PHI_SRC2]] +; ALL-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP7:%.*]], [[TMP8:%.*]] ; ALL-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; ALL-NEXT: br label [[ENDBLOCK:%.*]] ; ALL: loadbb: @@ -131,10 +127,10 @@ ; ALL-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; ALL: loadbb1: ; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; ALL-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16* -; ALL-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16* -; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]] +; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; ALL-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16* +; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16* +; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] ; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] ; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) ; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) @@ -179,10 +175,10 @@ ; X32-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X32: loadbb1: ; X32-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; X32-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* -; X32-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32* -; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]] +; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32* +; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] ; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] ; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) ; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) @@ -218,9 +214,7 @@ ; X64-LABEL: @cmp9( ; X64-NEXT: br label [[LOADBB:%.*]] ; X64: res_block: -; X64-NEXT: [[PHI_SRC1:%.*]] = phi i64 [ [[TMP7:%.*]], [[LOADBB]] ] -; X64-NEXT: [[PHI_SRC2:%.*]] = phi i64 [ [[TMP8:%.*]], [[LOADBB]] ] -; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[PHI_SRC1]], [[PHI_SRC2]] +; X64-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP7:%.*]], [[TMP8:%.*]] ; X64-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 1 ; X64-NEXT: br label [[ENDBLOCK:%.*]] ; X64: loadbb: @@ -273,10 +267,10 @@ ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: ; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i16* -; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i16* -; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP11]] +; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16* +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16* +; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]] ; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]] ; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]]) ; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]]) @@ -325,10 +319,10 @@ ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: ; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i32* -; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i32* -; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP11]] +; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32* +; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]] ; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]] ; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]]) ; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]]) @@ -395,10 +389,10 @@ ; X64-NEXT: br i1 [[TMP9]], label [[LOADBB1]], label [[RES_BLOCK:%.*]] ; X64: loadbb1: ; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i64* -; X64-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i64* -; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP11]] +; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i64* +; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i64* +; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]] ; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]] ; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]]) ; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]]) @@ -598,10 +592,10 @@ ; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] +; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* +; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* +; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] ; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 ; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 @@ -626,10 +620,10 @@ ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] +; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -646,10 +640,10 @@ ; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] +; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 @@ -675,10 +669,10 @@ ; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 -; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] +; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* +; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* +; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] ; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] @@ -701,10 +695,10 @@ ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] +; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -721,10 +715,10 @@ ; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] +; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] @@ -748,10 +742,10 @@ ; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] ; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]] ; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 -; X32-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X32-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 4 -; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] +; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 +; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* +; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* +; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] ; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] @@ -855,10 +849,10 @@ ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] +; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -875,10 +869,10 @@ ; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP7]] +; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16* +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64 @@ -916,10 +910,10 @@ ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -936,10 +930,10 @@ ; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 3 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3 +; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] @@ -975,10 +969,10 @@ ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] +; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -995,10 +989,10 @@ ; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 8 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP7]] +; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 +; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32* +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64 ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 @@ -1036,10 +1030,10 @@ ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5 -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 5 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5 +; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -1056,10 +1050,10 @@ ; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5 -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 5 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5 +; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] @@ -1095,10 +1089,10 @@ ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6 -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 6 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6 +; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -1115,10 +1109,10 @@ ; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6 -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 6 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6 +; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] @@ -1154,10 +1148,10 @@ ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7 -; X64_1LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_1LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 7 -; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7 +; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* +; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] @@ -1174,10 +1168,10 @@ ; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] ; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]] ; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7 -; X64_2LD-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i64* -; X64_2LD-NEXT: [[TMP8:%.*]] = getelementptr i8, i8* [[Y]], i64 7 -; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* -; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP7]] +; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7 +; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64* +; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* +; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]