Index: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp @@ -1812,7 +1812,7 @@ unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock); Builder.SetInsertPoint(LoadCmpBlocks[Index]); - + Value *Cmp = nullptr; for (unsigned i = 0; i < NumLoads; ++i) { unsigned LoadSize = getLoadSize(RemainingBytes); unsigned GEPIndex = NumBytesProcessed / LoadSize; @@ -1846,9 +1846,16 @@ LoadSrc1 = Builder.CreateZExtOrTrunc(LoadSrc1, MaxLoadType); LoadSrc2 = Builder.CreateZExtOrTrunc(LoadSrc2, MaxLoadType); } - Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); - Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType); - XorList.push_back(Diff); + if (NumLoads != 1) { + // If we have multiple loads per block, we need to generate a composite + // comparison using xor+or. + Diff = Builder.CreateXor(LoadSrc1, LoadSrc2); + Diff = Builder.CreateZExtOrTrunc(Diff, MaxLoadType); + XorList.push_back(Diff); + } else { + // If there's only one load per block, we just compare the loaded values. + Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2); + } } auto pairWiseOr = [&](std::vector &InList) -> std::vector { @@ -1862,16 +1869,17 @@ return OutList; }; - // Pairwise OR the XOR results. - OrList = pairWiseOr(XorList); - - // Pairwise OR the OR results until one result left. - while (OrList.size() != 1) { - OrList = pairWiseOr(OrList); + if (!Cmp) { + // Pairwise OR the XOR results. + OrList = pairWiseOr(XorList); + + // Pairwise OR the OR results until one result left. + while (OrList.size() != 1) { + OrList = pairWiseOr(OrList); + } + Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); } - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, OrList[0], - ConstantInt::get(Diff->getType(), 0)); BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) ? EndBlock : LoadCmpBlocks[Index + 1]; Index: llvm/trunk/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ llvm/trunk/test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -21,8 +21,7 @@ ; CHECK-NEXT: lwz 3, 0(3) ; CHECK-NEXT: lwz 4, 0(4) ; CHECK-NEXT: li 5, 1 -; CHECK-NEXT: xor 3, 3, 4 -; CHECK-NEXT: cmplwi 3, 0 +; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: isel 3, 0, 5, 2 ; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr @@ -38,19 +37,19 @@ ; CHECK: # BB#0: # %loadbb ; CHECK-NEXT: ld 5, 0(3) ; CHECK-NEXT: ld 6, 0(4) -; CHECK-NEXT: xor. 5, 5, 6 +; CHECK-NEXT: cmpld 5, 6 ; CHECK-NEXT: bne 0, .LBB1_2 ; CHECK-NEXT: # BB#1: # %loadbb1 ; CHECK-NEXT: ld 3, 8(3) ; CHECK-NEXT: ld 4, 8(4) -; CHECK-NEXT: xor. 3, 3, 4 +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: beq 0, .LBB1_3 ; CHECK-NEXT: .LBB1_2: # %res_block ; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB1_3: -; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB1_3: # %endblock ; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 16) @@ -65,27 +64,24 @@ ; CHECK: # BB#0: # %loadbb ; CHECK-NEXT: lwz 5, 0(3) ; CHECK-NEXT: lwz 6, 0(4) -; CHECK-NEXT: xor 5, 5, 6 -; CHECK-NEXT: cmplwi 5, 0 +; CHECK-NEXT: cmpld 5, 6 ; CHECK-NEXT: bne 0, .LBB2_3 ; CHECK-NEXT: # BB#1: # %loadbb1 ; CHECK-NEXT: lhz 5, 4(3) ; CHECK-NEXT: lhz 6, 4(4) -; CHECK-NEXT: xor 5, 5, 6 -; CHECK-NEXT: rlwinm. 5, 5, 0, 16, 31 +; CHECK-NEXT: cmpld 5, 6 ; CHECK-NEXT: bne 0, .LBB2_3 ; CHECK-NEXT: # BB#2: # %loadbb2 ; CHECK-NEXT: lbz 3, 6(3) ; CHECK-NEXT: lbz 4, 6(4) -; CHECK-NEXT: xor 3, 3, 4 -; CHECK-NEXT: rlwinm. 3, 3, 0, 24, 31 +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: beq 0, .LBB2_4 ; CHECK-NEXT: .LBB2_3: # %res_block ; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB2_4: # %endblock ; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr %call = tail call signext i32 @memcmp(i8* %x, i8* %y, i64 7) @@ -178,24 +174,22 @@ ; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha ; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l(3) ; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l(4) -; CHECK-NEXT: xor. 3, 3, 4 +; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: bne 0, .LBB5_2 ; CHECK-NEXT: # BB#1: # %loadbb1 ; CHECK-NEXT: addis 3, 2, .LzeroEqualityTest04.buffer1@toc@ha+8 ; CHECK-NEXT: addis 4, 2, .LzeroEqualityTest04.buffer2@toc@ha+8 ; CHECK-NEXT: ld 3, .LzeroEqualityTest04.buffer1@toc@l+8(3) ; CHECK-NEXT: ld 4, .LzeroEqualityTest04.buffer2@toc@l+8(4) -; CHECK-NEXT: xor. 3, 3, 4 -; CHECK-NEXT: beq 0, .LBB5_4 +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: beq 0, .LBB5_3 ; CHECK-NEXT: .LBB5_2: # %res_block ; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: .LBB5_3: # %endblock ; CHECK-NEXT: cntlzw 3, 3 ; CHECK-NEXT: srwi 3, 3, 5 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB5_4: -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB5_3 %call = tail call signext i32 @memcmp(i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer1 to i8*), i8* bitcast ([15 x i32]* @zeroEqualityTest04.buffer2 to i8*), i64 16) %not.tobool = icmp eq i32 %call, 0 %cond = zext i1 %not.tobool to i32