diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp --- a/llvm/lib/CodeGen/ExpandMemCmp.cpp +++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -71,7 +71,7 @@ std::vector LoadCmpBlocks; BasicBlock *EndBlock; PHINode *PhiRes; - const bool IsUsedForZeroCmp; + const CmpInst::Predicate ZeroCmpKind; const DataLayout &DL; IRBuilder<> Builder; // Represents the decomposition in blocks of the expansion. For example, @@ -95,13 +95,17 @@ void setupResultBlockPHINodes(); void setupEndBlockPHINodes(); Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex); + Value *getCompareLoadPairsForZeroEquality(unsigned BlockIndex, + unsigned &LoadIndex); + Value *getCompareLoadPairsForZeroRelational(unsigned BlockIndex, + unsigned &LoadIndex); void emitLoadCompareBlock(unsigned BlockIndex); void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex, unsigned &LoadIndex); void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes); void emitMemCmpResultBlock(); - Value *getMemCmpExpansionZeroCase(); - Value *getMemCmpEqZeroOneBlock(); + Value *getMemCmpWithZeroSeveralBlocks(); + Value *getMemCmpWithZeroOneBlock(); Value *getMemCmpOneBlock(); struct LoadPair { Value *Lhs = nullptr; @@ -121,7 +125,8 @@ public: MemCmpExpansion(CallInst *CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, - const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout); + CmpInst::Predicate ZeroCmpKind, + const DataLayout &TheDataLayout); unsigned getNumBlocks(); uint64_t getNumLoads() const { return LoadSequence.size(); } @@ -210,10 +215,10 @@ MemCmpExpansion::MemCmpExpansion( CallInst *const CI, uint64_t Size, const TargetTransformInfo::MemCmpExpansionOptions &Options, - const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout) + const CmpInst::Predicate ZeroCmpKind, const DataLayout &TheDataLayout) : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock), - IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) { + ZeroCmpKind(ZeroCmpKind), DL(TheDataLayout), Builder(CI) { assert(Size > 0 && "zero blocks"); // Scale the max size down if the target can load more bytes than we need. llvm::ArrayRef LoadSizes(Options.LoadSizes); @@ -230,7 +235,8 @@ assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant"); // If we allow overlapping loads and the load sequence is not already optimal, // use overlapping loads. - if (Options.AllowOverlappingLoads && + // FIXME: Allow overlapping for any zero cmp, not just zero equality cmp. + if (ICmpInst::isEquality(ZeroCmpKind) && Options.AllowOverlappingLoads && (LoadSequence.empty() || LoadSequence.size() > 2)) { unsigned OverlappingNumLoadsNonOneByte = 0; auto OverlappingLoads = computeOverlappingLoadSequence( @@ -246,7 +252,9 @@ } unsigned MemCmpExpansion::getNumBlocks() { - if (IsUsedForZeroCmp) + // FIXME: Allow multiple loads per block for any zero cmp, not just zero + // equality cmp. + if (ICmpInst::isEquality(ZeroCmpKind)) return getNumLoads() / NumLoadsPerBlockForZeroCmp + (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0); return getNumLoads(); @@ -342,25 +350,66 @@ } } -/// Generate an equality comparison for one or more pairs of loaded values. -/// This is used in the case where the memcmp() call is compared equal or not -/// equal to zero. +/// Generate an equality/inequality comparison for one or more pairs of loaded +/// values. This is used in the case where the memcmp() call is compared to +/// zero. Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex) { assert(LoadIndex < getNumLoads() && "getCompareLoadPairs() called with no remaining loads"); - std::vector XorList, OrList; - Value *Diff = nullptr; - - const unsigned NumLoads = - std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp); - // For a single-block expansion, start inserting before the memcmp call. if (LoadCmpBlocks.empty()) Builder.SetInsertPoint(CI); else Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]); + switch (ZeroCmpKind) { + case CmpInst::ICMP_EQ: + case CmpInst::ICMP_NE: + return getCompareLoadPairsForZeroEquality(BlockIndex, LoadIndex); + case CmpInst::ICMP_SLT: + case CmpInst::ICMP_SGT: + case CmpInst::ICMP_SLE: + case CmpInst::ICMP_SGE: + return getCompareLoadPairsForZeroRelational(BlockIndex, LoadIndex); + default: + llvm_unreachable("getCompareLoadPairs only works for zero comparison"); + } + return nullptr; +} + +Value * +MemCmpExpansion::getCompareLoadPairsForZeroRelational(unsigned BlockIndex, + unsigned &LoadIndex) { + const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex]; + Type *LoadType = + IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); + + const LoadPair Loads = + getLoadPair(LoadType, /*NeedsBSwap=*/DL.isLittleEndian() && + CurLoadEntry.LoadSize != 1, + LoadType, CurLoadEntry.Offset); + + Value *Cmp = nullptr; + // If there's only one load per block, we just compare the loaded values. + Cmp = Builder.CreateICmp(ZeroCmpKind, Loads.Lhs, Loads.Rhs); + + ++LoadIndex; + return Cmp; +} + +/// Generate an equality comparison for one or more pairs of loaded values. +/// This is used in the case where the memcmp() call is compared equal or not +/// equal to zero. +Value * +MemCmpExpansion::getCompareLoadPairsForZeroEquality(unsigned BlockIndex, + unsigned &LoadIndex) { + std::vector XorList, OrList; + Value *Diff = nullptr; + + const unsigned NumLoads = + std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp); + Value *Cmp = nullptr; // If we have multiple loads per block, we need to generate a composite // comparison using xor+or. The type for the combinations is the largest load @@ -382,7 +431,7 @@ XorList.push_back(Diff); } else { // If there's only one load per block, we just compare the loaded values. - Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs); + Cmp = Builder.CreateICmp(ZeroCmpKind, Loads.Lhs, Loads.Rhs); } } @@ -407,7 +456,8 @@ } assert(Diff && "Failed to find comparison diff"); - Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); + Cmp = Builder.CreateICmp(ZeroCmpKind, OrList[0], + ConstantInt::get(Diff->getType(), 0)); } return Cmp; @@ -464,8 +514,8 @@ CurLoadEntry.Offset); // Add the loaded values to the phi nodes for calculating memcmp result only - // if result is not used in a zero equality. - if (!IsUsedForZeroCmp) { + // if result is not used in a zero comparison. + if (ZeroCmpKind == CmpInst::BAD_ICMP_PREDICATE) { ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]); ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]); } @@ -492,9 +542,9 @@ // memcmp result. It compares the two loaded source values and returns -1 if // src1 < src2 and 1 if src1 > src2. void MemCmpExpansion::emitMemCmpResultBlock() { - // Special case: if memcmp result is used in a zero equality, result does not - // need to be calculated and can simply return 1. - if (IsUsedForZeroCmp) { + // Special case: if memcmp result is used in a zero comparison, result does + // not need to be calculated and can simply return 1. + if (ZeroCmpKind != CmpInst::BAD_ICMP_PREDICATE) { BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); Builder.SetInsertPoint(ResBlock.BB, InsertPt); Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); @@ -533,7 +583,7 @@ PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); } -Value *MemCmpExpansion::getMemCmpExpansionZeroCase() { +Value *MemCmpExpansion::getMemCmpWithZeroSeveralBlocks() { unsigned LoadIndex = 0; // This loop populates each of the LoadCmpBlocks with the IR sequence to // handle multiple loads per block. @@ -548,7 +598,7 @@ /// A memcmp expansion that compares equality with 0 and only has one block of /// load and compare can bypass the compare, branch, and phi IR that is required /// in the general case. -Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() { +Value *MemCmpExpansion::getMemCmpWithZeroOneBlock() { unsigned LoadIndex = 0; Value *Cmp = getCompareLoadPairs(0, LoadIndex); assert(LoadIndex == getNumLoads() && "some entries were not consumed"); @@ -599,7 +649,8 @@ // calculate which source was larger. The calculation requires the // two loaded source values of each load compare block. // These will be saved in the phi nodes created by setupResultBlockPHINodes. - if (!IsUsedForZeroCmp) setupResultBlockPHINodes(); + if (ZeroCmpKind == CmpInst::BAD_ICMP_PREDICATE) + setupResultBlockPHINodes(); // Create the number of required load compare basic blocks. createLoadCmpBlocks(); @@ -611,9 +662,9 @@ Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - if (IsUsedForZeroCmp) - return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock() - : getMemCmpExpansionZeroCase(); + if (ZeroCmpKind != CmpInst::BAD_ICMP_PREDICATE) + return getNumBlocks() == 1 ? getMemCmpWithZeroOneBlock() + : getMemCmpWithZeroSeveralBlocks(); if (getNumBlocks() == 1) return getMemCmpOneBlock(); @@ -626,6 +677,39 @@ return PhiRes; } +// If the memcmp is only used for a given comparison against zero, +// return that predicate. +static CmpInst::Predicate isOnlyUsedInZeroComparison(const Instruction *CxtI) { + CmpInst::Predicate CommonPred = CmpInst::BAD_ICMP_PREDICATE; + for (const User *U : CxtI->users()) { + // The user must be an icmp. + const ICmpInst *IC = dyn_cast(U); + if (!IC) + return CmpInst::BAD_ICMP_PREDICATE; + // The rhs must be zero. + Constant *C = dyn_cast(IC->getOperand(1)); + if (C == nullptr || !C->isNullValue()) + return CmpInst::BAD_ICMP_PREDICATE; + // The predicate must match that of other users. + const CmpInst::Predicate Pred = IC->getPredicate(); + if (CommonPred == CmpInst::BAD_ICMP_PREDICATE) + CommonPred = Pred; + else if (Pred != CommonPred) + return CmpInst::BAD_ICMP_PREDICATE; + } + return CommonPred; +} + +// Change all user icmp predicates to `!= 0`. +// Requires `isOnlyUsedInZeroComparison(CxtI)`. +static void updateZeroComparisonPredicates(Instruction *CxtI) { + for (User *U : CxtI->users()) { + ICmpInst *IC = dyn_cast(U); + assert(IC && "isOnlyUsedInZeroComparison must be true"); + IC->setPredicate(ICmpInst::ICMP_NE); + } +} + // This function checks to see if an expansion of memcmp can be generated. // It checks for constant compare size that is less than the max inline size. // If an expansion cannot occur, returns false to leave as a library call. @@ -721,11 +805,11 @@ } // TTI call to check if target would like to expand memcmp. Also, get the // available load sizes. - const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI); + const CmpInst::Predicate ZeroCmpKind = isOnlyUsedInZeroComparison(CI); bool OptForSize = CI->getFunction()->hasOptSize() || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); - auto Options = TTI->enableMemCmpExpansion(OptForSize, - IsUsedForZeroCmp); + auto Options = + TTI->enableMemCmpExpansion(OptForSize, ICmpInst::isEquality(ZeroCmpKind)); if (!Options) return false; if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()) @@ -738,7 +822,7 @@ if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences()) Options.MaxNumLoads = MaxLoadsPerMemcmp; - MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL); + MemCmpExpansion Expansion(CI, SizeVal, Options, ZeroCmpKind, *DL); // Don't expand if this will require more loads than desired by the target. if (Expansion.getNumLoads() == 0) { @@ -751,6 +835,9 @@ Value *Res = Expansion.getMemCmpExpansion(); // Replace call with result of expansion and erase call. + if (ZeroCmpKind != CmpInst::BAD_ICMP_PREDICATE) { + updateZeroComparisonPredicates(CI); + } CI->replaceAllUsesWith(Res); CI->eraseFromParent(); diff --git a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll --- a/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ b/llvm/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -163,7 +163,7 @@ ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: sldi 5, 5, 32 ; CHECK-NEXT: cmpld 4, 5 -; CHECK-NEXT: bne 0, .LBB6_2 +; CHECK-NEXT: beq 0, .LBB6_2 ; CHECK-NEXT: # %bb.1: # %loadbb1 ; CHECK-NEXT: li 4, 3 ; CHECK-NEXT: ld 3, 8(3) @@ -171,12 +171,11 @@ ; CHECK-NEXT: ori 4, 4, 2 ; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: beq 0, .LBB6_3 +; CHECK-NEXT: bne 0, .LBB6_3 ; CHECK-NEXT: .LBB6_2: # %res_block ; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: .LBB6_3: # %endblock -; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: srwi 3, 3, 5 +; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* %X, i64 16) %not.tobool = icmp eq i32 %call, 0 diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -121,14 +121,11 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx +; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: cmpw %ax, %cx +; X86-NEXT: setl %al ; X86-NEXT: retl ; ; X64-LABEL: length2_lt: @@ -137,11 +134,8 @@ ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: cmpw %cx, %ax +; X64-NEXT: setl %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind %c = icmp slt i32 %m, 0 @@ -157,10 +151,7 @@ ; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: rolw $8, %cx ; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: subl %eax, %ecx -; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: cmpw %ax, %cx ; X86-NEXT: setg %al ; X86-NEXT: retl ; @@ -170,10 +161,7 @@ ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: testl %eax, %eax +; X64-NEXT: cmpw %cx, %ax ; X64-NEXT: setg %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind @@ -361,29 +349,21 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%eax), %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: seta %al -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: bswapl %eax +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: setl %al ; X86-NEXT: retl ; ; X64-LABEL: length4_lt: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setl %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind %c = icmp slt i32 %m, 0 @@ -399,11 +379,7 @@ ; X86-NEXT: movl (%eax), %eax ; X86-NEXT: bswapl %ecx ; X86-NEXT: bswapl %eax -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: seta %dl -; X86-NEXT: sbbl $0, %edx -; X86-NEXT: testl %edx, %edx ; X86-NEXT: setg %al ; X86-NEXT: retl ; @@ -413,11 +389,7 @@ ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: seta %dl -; X64-NEXT: sbbl $0, %edx -; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind @@ -521,52 +493,47 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length5_lt: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %edx +; X86-NEXT: movl (%eax), %esi ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB18_1 -; X86-NEXT: # %bb.2: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB18_3 -; X86-NEXT: .LBB18_1: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jl .LBB18_2 +; X86-NEXT: # %bb.1: # %loadbb1 +; X86-NEXT: movb 4(%ecx), %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: cmpb 4(%eax), %dl +; X86-NEXT: jge .LBB18_3 +; X86-NEXT: .LBB18_2: # %res_block +; X86-NEXT: movl $1, %ecx ; X86-NEXT: .LBB18_3: # %endblock -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: setne %al ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length5_lt: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB18_1 -; X64-NEXT: # %bb.2: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq -; X64-NEXT: .LBB18_1: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: jl .LBB18_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movb 4(%rdi), %cl +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb 4(%rsi), %cl +; X64-NEXT: jge .LBB18_3 +; X64-NEXT: .LBB18_2: # %res_block +; X64-NEXT: movl $1, %eax +; X64-NEXT: .LBB18_3: # %endblock +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind %c = icmp slt i32 %m, 0 @@ -1175,54 +1142,51 @@ ; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %ecx -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: bswapl %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %edx +; X86-NEXT: movl (%eax), %esi ; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: jl .LBB33_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %ecx -; X86-NEXT: movl 4(%eax), %edx -; X86-NEXT: bswapl %ecx +; X86-NEXT: movl 4(%ecx), %edx +; X86-NEXT: movl 4(%eax), %esi ; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: jl .LBB33_4 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %ecx -; X86-NEXT: movl 8(%eax), %edx -; X86-NEXT: bswapl %ecx +; X86-NEXT: movl 8(%ecx), %edx +; X86-NEXT: movl 8(%eax), %esi ; X86-NEXT: bswapl %edx -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: jne .LBB33_4 +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: jl .LBB33_4 ; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %ecx +; X86-NEXT: movl 12(%ecx), %ecx ; X86-NEXT: movl 12(%eax), %edx ; X86-NEXT: bswapl %ecx ; X86-NEXT: bswapl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: je .LBB33_5 +; X86-NEXT: jge .LBB33_5 ; X86-NEXT: .LBB33_4: # %res_block -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: setae %al -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: movl $1, %eax ; X86-NEXT: .LBB33_5: # %endblock -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length16_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB33_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jl .LBB33_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx ; X64-NEXT: movq 8(%rsi), %rdx @@ -1230,15 +1194,12 @@ ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB33_3 +; X64-NEXT: jge .LBB33_3 ; X64-NEXT: .LBB33_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: movl $1, %eax ; X64-NEXT: .LBB33_3: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp slt i32 %call, 0 @@ -1249,44 +1210,41 @@ ; X86-LABEL: length16_gt: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esi), %eax -; X86-NEXT: movl (%edx), %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB34_4 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %edx +; X86-NEXT: movl (%eax), %esi +; X86-NEXT: bswapl %edx +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: jg .LBB34_4 ; X86-NEXT: # %bb.1: # %loadbb1 -; X86-NEXT: movl 4(%esi), %eax -; X86-NEXT: movl 4(%edx), %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB34_4 +; X86-NEXT: movl 4(%ecx), %edx +; X86-NEXT: movl 4(%eax), %esi +; X86-NEXT: bswapl %edx +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: jg .LBB34_4 ; X86-NEXT: # %bb.2: # %loadbb2 -; X86-NEXT: movl 8(%esi), %eax -; X86-NEXT: movl 8(%edx), %ecx -; X86-NEXT: bswapl %eax -; X86-NEXT: bswapl %ecx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: jne .LBB34_4 +; X86-NEXT: movl 8(%ecx), %edx +; X86-NEXT: movl 8(%eax), %esi +; X86-NEXT: bswapl %edx +; X86-NEXT: bswapl %esi +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: jg .LBB34_4 ; X86-NEXT: # %bb.3: # %loadbb3 -; X86-NEXT: movl 12(%esi), %eax -; X86-NEXT: movl 12(%edx), %ecx -; X86-NEXT: bswapl %eax +; X86-NEXT: movl 12(%ecx), %ecx +; X86-NEXT: movl 12(%eax), %edx ; X86-NEXT: bswapl %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: je .LBB34_5 +; X86-NEXT: bswapl %edx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: jle .LBB34_5 ; X86-NEXT: .LBB34_4: # %res_block -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %ecx, %eax -; X86-NEXT: setae %dl -; X86-NEXT: leal -1(%edx,%edx), %edx +; X86-NEXT: movl $1, %eax ; X86-NEXT: .LBB34_5: # %endblock -; X86-NEXT: testl %edx, %edx -; X86-NEXT: setg %al +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %al ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -1297,23 +1255,20 @@ ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB34_2 +; X64-NEXT: jg .LBB34_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx -; X64-NEXT: bswapq %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB34_3 +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jle .LBB34_3 ; X64-NEXT: .LBB34_2: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: movl $1, %eax ; X64-NEXT: .LBB34_3: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp sgt i32 %call, 0 @@ -1591,19 +1546,19 @@ ; ; X64-LABEL: length24_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB38_3 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jl .LBB38_3 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB38_3 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jl .LBB38_3 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rcx ; X64-NEXT: movq 16(%rsi), %rdx @@ -1611,15 +1566,12 @@ ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB38_4 +; X64-NEXT: jge .LBB38_4 ; X64-NEXT: .LBB38_3: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: movl $1, %eax ; X64-NEXT: .LBB38_4: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind %cmp = icmp slt i32 %call, 0 @@ -1646,30 +1598,27 @@ ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB39_3 +; X64-NEXT: jg .LBB39_3 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB39_3 +; X64-NEXT: jg .LBB39_3 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rax -; X64-NEXT: movq 16(%rsi), %rcx -; X64-NEXT: bswapq %rax +; X64-NEXT: movq 16(%rdi), %rcx +; X64-NEXT: movq 16(%rsi), %rdx ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB39_4 +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jle .LBB39_4 ; X64-NEXT: .LBB39_3: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: movl $1, %eax ; X64-NEXT: .LBB39_4: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 24) nounwind %cmp = icmp sgt i32 %call, 0 @@ -2366,26 +2315,26 @@ ; ; X64-LABEL: length32_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jl .LBB49_4 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 8(%rsi), %rdx +; X64-NEXT: movq 8(%rdi), %rax +; X64-NEXT: movq 8(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jl .LBB49_4 ; X64-NEXT: # %bb.2: # %loadbb2 -; X64-NEXT: movq 16(%rdi), %rcx -; X64-NEXT: movq 16(%rsi), %rdx +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq 16(%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB49_4 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jl .LBB49_4 ; X64-NEXT: # %bb.3: # %loadbb3 ; X64-NEXT: movq 24(%rdi), %rcx ; X64-NEXT: movq 24(%rsi), %rdx @@ -2393,15 +2342,12 @@ ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB49_5 +; X64-NEXT: jge .LBB49_5 ; X64-NEXT: .LBB49_4: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: movl $1, %eax ; X64-NEXT: .LBB49_5: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind %cmp = icmp slt i32 %call, 0 @@ -2428,37 +2374,34 @@ ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB50_4 +; X64-NEXT: jg .LBB50_4 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rax ; X64-NEXT: movq 8(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB50_4 +; X64-NEXT: jg .LBB50_4 ; X64-NEXT: # %bb.2: # %loadbb2 ; X64-NEXT: movq 16(%rdi), %rax ; X64-NEXT: movq 16(%rsi), %rcx ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB50_4 +; X64-NEXT: jg .LBB50_4 ; X64-NEXT: # %bb.3: # %loadbb3 -; X64-NEXT: movq 24(%rdi), %rax -; X64-NEXT: movq 24(%rsi), %rcx -; X64-NEXT: bswapq %rax +; X64-NEXT: movq 24(%rdi), %rcx +; X64-NEXT: movq 24(%rsi), %rdx ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB50_5 +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jle .LBB50_5 ; X64-NEXT: .LBB50_4: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: movl $1, %eax ; X64-NEXT: .LBB50_5: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 32) nounwind %cmp = icmp sgt i32 %call, 0 diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -120,9 +120,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: addl $-12594, %eax # imm = 0xCECE -; X86-NEXT: testl %eax, %eax +; X86-NEXT: cwtl +; X86-NEXT: cmpl $12594, %eax # imm = 0x3132 ; X86-NEXT: setg %al ; X86-NEXT: retl ; @@ -130,9 +129,8 @@ ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: rolw $8, %ax -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: addl $-12594, %eax # imm = 0xCECE -; X64-NEXT: testl %eax, %eax +; X64-NEXT: cwtl +; X64-NEXT: cmpl $12594, %eax # imm = 0x3132 ; X64-NEXT: setg %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* getelementptr inbounds ([513 x i8], [513 x i8]* @.str, i32 0, i32 1), i64 2) nounwind @@ -167,14 +165,11 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %edx +; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: rolw $8, %cx -; X86-NEXT: rolw $8, %dx -; X86-NEXT: movzwl %cx, %eax -; X86-NEXT: movzwl %dx, %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: cmpw %ax, %cx +; X86-NEXT: setl %al ; X86-NEXT: retl ; ; X64-LABEL: length2_lt: @@ -183,11 +178,8 @@ ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: cmpw %cx, %ax +; X64-NEXT: setl %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind %c = icmp slt i32 %m, 0 @@ -203,10 +195,7 @@ ; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: rolw $8, %cx ; X86-NEXT: rolw $8, %ax -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: movzwl %ax, %eax -; X86-NEXT: subl %eax, %ecx -; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: cmpw %ax, %cx ; X86-NEXT: setg %al ; X86-NEXT: retl ; @@ -216,10 +205,7 @@ ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: testl %eax, %eax +; X64-NEXT: cmpw %cx, %ax ; X64-NEXT: setg %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind @@ -407,29 +393,21 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl (%eax), %eax ; X86-NEXT: bswapl %ecx -; X86-NEXT: bswapl %edx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: seta %al -; X86-NEXT: sbbl $0, %eax -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: bswapl %eax +; X86-NEXT: cmpl %eax, %ecx +; X86-NEXT: setl %al ; X86-NEXT: retl ; ; X64-LABEL: length4_lt: ; X64: # %bb.0: -; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: movl (%rsi), %edx +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: movl (%rsi), %ecx +; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: bswapl %edx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %edx, %ecx -; X64-NEXT: seta %al -; X64-NEXT: sbbl $0, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: setl %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind %c = icmp slt i32 %m, 0 @@ -445,11 +423,7 @@ ; X86-NEXT: movl (%eax), %eax ; X86-NEXT: bswapl %ecx ; X86-NEXT: bswapl %eax -; X86-NEXT: xorl %edx, %edx ; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: seta %dl -; X86-NEXT: sbbl $0, %edx -; X86-NEXT: testl %edx, %edx ; X86-NEXT: setg %al ; X86-NEXT: retl ; @@ -459,11 +433,7 @@ ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx -; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: seta %dl -; X64-NEXT: sbbl $0, %edx -; X64-NEXT: testl %edx, %edx ; X64-NEXT: setg %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind @@ -567,52 +537,47 @@ define i1 @length5_lt(i8* %X, i8* %Y) nounwind { ; X86-LABEL: length5_lt: -; X86: # %bb.0: # %loadbb +; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %edx -; X86-NEXT: movl (%ecx), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %edx +; X86-NEXT: movl (%eax), %esi ; X86-NEXT: bswapl %edx ; X86-NEXT: bswapl %esi ; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: jne .LBB20_1 -; X86-NEXT: # %bb.2: # %loadbb1 -; X86-NEXT: movzbl 4(%eax), %eax -; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: jmp .LBB20_3 -; X86-NEXT: .LBB20_1: # %res_block -; X86-NEXT: setae %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: leal -1(%eax,%eax), %eax +; X86-NEXT: jl .LBB20_2 +; X86-NEXT: # %bb.1: # %loadbb1 +; X86-NEXT: movb 4(%ecx), %dl +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: cmpb 4(%eax), %dl +; X86-NEXT: jge .LBB20_3 +; X86-NEXT: .LBB20_2: # %res_block +; X86-NEXT: movl $1, %ecx ; X86-NEXT: .LBB20_3: # %endblock -; X86-NEXT: shrl $31, %eax -; X86-NEXT: # kill: def $al killed $al killed $eax +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: setne %al ; X86-NEXT: popl %esi ; X86-NEXT: retl ; ; X64-LABEL: length5_lt: -; X64: # %bb.0: # %loadbb +; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: movl (%rsi), %ecx ; X64-NEXT: bswapl %eax ; X64-NEXT: bswapl %ecx ; X64-NEXT: cmpl %ecx, %eax -; X64-NEXT: jne .LBB20_1 -; X64-NEXT: # %bb.2: # %loadbb1 -; X64-NEXT: movzbl 4(%rdi), %eax -; X64-NEXT: movzbl 4(%rsi), %ecx -; X64-NEXT: subl %ecx, %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax -; X64-NEXT: retq -; X64-NEXT: .LBB20_1: # %res_block -; X64-NEXT: setae %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: jl .LBB20_2 +; X64-NEXT: # %bb.1: # %loadbb1 +; X64-NEXT: movb 4(%rdi), %cl +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpb 4(%rsi), %cl +; X64-NEXT: jge .LBB20_3 +; X64-NEXT: .LBB20_2: # %res_block +; X64-NEXT: movl $1, %eax +; X64-NEXT: .LBB20_3: # %endblock +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind %c = icmp slt i32 %m, 0 @@ -1158,12 +1123,12 @@ ; ; X64-LABEL: length16_lt: ; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: movq (%rsi), %rdx +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq (%rsi), %rcx +; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx -; X64-NEXT: bswapq %rdx -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: jne .LBB37_2 +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: jl .LBB37_2 ; X64-NEXT: # %bb.1: # %loadbb1 ; X64-NEXT: movq 8(%rdi), %rcx ; X64-NEXT: movq 8(%rsi), %rdx @@ -1171,15 +1136,12 @@ ; X64-NEXT: bswapq %rdx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: je .LBB37_3 +; X64-NEXT: jge .LBB37_3 ; X64-NEXT: .LBB37_2: # %res_block -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: setae %al -; X64-NEXT: leal -1(%rax,%rax), %eax +; X64-NEXT: movl $1, %eax ; X64-NEXT: .LBB37_3: # %endblock -; X64-NEXT: shrl $31, %eax -; X64-NEXT: # kill: def $al killed $al killed $eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp slt i32 %call, 0 @@ -1206,23 +1168,20 @@ ; X64-NEXT: bswapq %rax ; X64-NEXT: bswapq %rcx ; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: jne .LBB38_2 +; X64-NEXT: jg .LBB38_2 ; X64-NEXT: # %bb.1: # %loadbb1 -; X64-NEXT: movq 8(%rdi), %rax -; X64-NEXT: movq 8(%rsi), %rcx -; X64-NEXT: bswapq %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 8(%rsi), %rdx ; X64-NEXT: bswapq %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB38_3 +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: jle .LBB38_3 ; X64-NEXT: .LBB38_2: # %res_block -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: setae %dl -; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: movl $1, %eax ; X64-NEXT: .LBB38_3: # %endblock -; X64-NEXT: testl %edx, %edx -; X64-NEXT: setg %al +; X64-NEXT: testl %eax, %eax +; X64-NEXT: setne %al ; X64-NEXT: retq %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 16) nounwind %cmp = icmp sgt i32 %call, 0 diff --git a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll --- a/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll +++ b/llvm/test/Transforms/ExpandMemCmp/X86/memcmp.ll @@ -418,9 +418,9 @@ ; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* ; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] ; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] -; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] +; ALL-NEXT: [[TMP5:%.*]] = icmp eq i16 [[TMP3]], [[TMP4]] ; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; ALL-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP6]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] ; @@ -445,9 +445,9 @@ ; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 ; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0 +; X32-NEXT: [[TMP14:%.*]] = icmp eq i16 [[TMP13]], 0 ; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -460,18 +460,18 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i16 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2 ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2 ; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] ; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = icmp eq i8 [[TMP8]], [[TMP9]] ; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -489,9 +489,9 @@ ; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i16 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i16 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp eq i16 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -507,9 +507,9 @@ ; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* ; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; ALL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], [[TMP4]] ; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; ALL-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP6]], 0 ; ALL-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; ALL-NEXT: ret i32 [[CONV]] ; @@ -534,9 +534,9 @@ ; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 ; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -549,18 +549,18 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4 ; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] ; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = icmp eq i8 [[TMP8]], [[TMP9]] ; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -578,9 +578,9 @@ ; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -607,9 +607,9 @@ ; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 ; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] ; X32-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X32-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 ; X32-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X32-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP17]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -622,7 +622,7 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4 @@ -631,11 +631,11 @@ ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* ; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp eq i16 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -655,9 +655,9 @@ ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32 ; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]] ; X64_2LD-NEXT: [[TMP15:%.*]] = or i32 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP17]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -682,9 +682,9 @@ ; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -697,7 +697,7 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 @@ -706,11 +706,11 @@ ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* ; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -728,9 +728,9 @@ ; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -755,9 +755,9 @@ ; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] ; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]] ; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]] -; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +; X32-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0 ; X32-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X32-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X32-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X32-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X32-NEXT: ret i32 [[CONV]] ; @@ -766,9 +766,9 @@ ; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP3]], [[TMP4]] ; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP6]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ; @@ -794,18 +794,18 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 ; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8 ; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]] ; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]] -; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]] +; X64_1LD-NEXT: [[TMP10:%.*]] = icmp eq i8 [[TMP8]], [[TMP9]] ; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -823,9 +823,9 @@ ; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64 ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -851,7 +851,7 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 @@ -860,11 +860,11 @@ ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16* ; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp eq i16 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -884,9 +884,9 @@ ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64 ; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] ; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP15]], 0 ; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP17]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -912,7 +912,7 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3 @@ -921,11 +921,11 @@ ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* ; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp eq i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -943,9 +943,9 @@ ; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -971,7 +971,7 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8 @@ -980,11 +980,11 @@ ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32* ; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -1004,9 +1004,9 @@ ; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64 ; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]] ; X64_2LD-NEXT: [[TMP15:%.*]] = or i64 [[TMP5]], [[TMP14]] -; X64_2LD-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 +; X64_2LD-NEXT: [[TMP16:%.*]] = icmp eq i64 [[TMP15]], 0 ; X64_2LD-NEXT: [[TMP17:%.*]] = zext i1 [[TMP16]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP17]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP17]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -1032,7 +1032,7 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5 @@ -1041,11 +1041,11 @@ ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* ; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp eq i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -1063,9 +1063,9 @@ ; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -1091,7 +1091,7 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6 @@ -1100,11 +1100,11 @@ ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* ; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp eq i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -1122,9 +1122,9 @@ ; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -1150,7 +1150,7 @@ ; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64* ; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]] -; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +; X64_1LD-NEXT: [[TMP5:%.*]] = icmp eq i64 [[TMP3]], [[TMP4]] ; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]] ; X64_1LD: loadbb1: ; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7 @@ -1159,11 +1159,11 @@ ; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64* ; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]] ; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] -; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +; X64_1LD-NEXT: [[TMP12:%.*]] = icmp eq i64 [[TMP10]], [[TMP11]] ; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]] ; X64_1LD: endblock: ; X64_1LD-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, [[LOADBB1]] ], [ 1, [[RES_BLOCK]] ] -; X64_1LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[PHI_RES]], 0 +; X64_1LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[PHI_RES]], 0 ; X64_1LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_1LD-NEXT: ret i32 [[CONV]] ; @@ -1181,9 +1181,9 @@ ; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]] ; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]] ; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]] -; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0 +; X64_2LD-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 0 ; X64_2LD-NEXT: [[TMP15:%.*]] = zext i1 [[TMP14]] to i32 -; X64_2LD-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP15]], 0 +; X64_2LD-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP15]], 0 ; X64_2LD-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64_2LD-NEXT: ret i32 [[CONV]] ; @@ -1205,9 +1205,9 @@ ; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i128* ; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]] ; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]] -; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]] +; X64-NEXT: [[TMP5:%.*]] = icmp eq i128 [[TMP3]], [[TMP4]] ; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 -; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0 +; X64-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP6]], 0 ; X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 ; X64-NEXT: ret i32 [[CONV]] ;