Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -1991,16 +1991,13 @@ ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]); } - Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2); - - Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff, - ConstantInt::get(Diff->getType(), 0)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, LoadSrc1, LoadSrc2); BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1)) ? EndBlock : LoadCmpBlocks[Index + 1]; // Early exit branch if difference found to ResultBlock. Otherwise, continue // to next LoadCmpBlock or EndBlock. - BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp); + BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp); Builder.Insert(CmpBr); // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0 Index: test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll =================================================================== --- test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll +++ test/CodeGen/PowerPC/memCmpUsedInZeroEqualityComparison.ll @@ -100,28 +100,26 @@ ; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest02.buffer2@toc@l ; CHECK-NEXT: ldbrx 3, 0, 6 ; CHECK-NEXT: ldbrx 4, 0, 5 -; CHECK-NEXT: subf. 7, 4, 3 +; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: bne 0, .LBB3_2 ; CHECK-NEXT: # BB#1: # %loadbb1 ; CHECK-NEXT: li 4, 8 ; CHECK-NEXT: ldbrx 3, 6, 4 ; CHECK-NEXT: ldbrx 4, 5, 4 -; CHECK-NEXT: subf. 5, 4, 3 -; CHECK-NEXT: beq 0, .LBB3_4 +; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: beq 0, .LBB3_3 ; CHECK-NEXT: .LBB3_2: # %res_block ; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: li 11, 1 ; CHECK-NEXT: li 12, -1 -; CHECK-NEXT: isel 3, 12, 3, 0 +; CHECK-NEXT: isel 5, 12, 11, 0 ; CHECK-NEXT: .LBB3_3: # %endblock -; CHECK-NEXT: cmpwi 3, 1 +; CHECK-NEXT: cmpwi 5, 1 ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: li 4, 1 ; CHECK-NEXT: isel 3, 4, 3, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB3_4: -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB3_3 %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest02.buffer2 to i8*), i64 16) %not.cmp = icmp slt i32 %call, 1 %. = zext i1 %not.cmp to i32 @@ -138,27 +136,25 @@ ; CHECK-NEXT: addi 5, 4, .LzeroEqualityTest03.buffer2@toc@l ; CHECK-NEXT: ldbrx 3, 0, 6 ; CHECK-NEXT: ldbrx 4, 0, 5 -; CHECK-NEXT: subf. 7, 4, 3 +; CHECK-NEXT: cmpld 3, 4 ; CHECK-NEXT: bne 0, .LBB4_2 ; CHECK-NEXT: # BB#1: # %loadbb1 ; CHECK-NEXT: li 4, 8 ; CHECK-NEXT: ldbrx 3, 6, 4 ; CHECK-NEXT: ldbrx 4, 5, 4 -; CHECK-NEXT: subf. 5, 4, 3 -; CHECK-NEXT: beq 0, .LBB4_4 +; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: cmpld 3, 4 +; CHECK-NEXT: beq 0, .LBB4_3 ; CHECK-NEXT: .LBB4_2: # %res_block ; CHECK-NEXT: cmpld 3, 4 -; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: li 11, 1 ; CHECK-NEXT: li 12, -1 -; CHECK-NEXT: isel 3, 12, 3, 0 +; CHECK-NEXT: isel 5, 12, 11, 0 ; CHECK-NEXT: .LBB4_3: # %endblock -; CHECK-NEXT: srwi 3, 3, 31 +; CHECK-NEXT: srwi 3, 5, 31 ; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: clrldi 3, 3, 32 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB4_3 %call = tail call signext i32 @memcmp(i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer1 to i8*), i8* bitcast ([4 x i32]* @zeroEqualityTest03.buffer2 to i8*), i64 16) %call.lobit = lshr i32 %call, 31 %call.lobit.not = xor i32 %call.lobit, 1 Index: test/CodeGen/PowerPC/memcmp.ll =================================================================== --- test/CodeGen/PowerPC/memcmp.ll +++ test/CodeGen/PowerPC/memcmp.ll @@ -13,10 +13,9 @@ ; CHECK: ldbrx [[LOAD1:[0-9]+]] ; CHECK-NEXT: ldbrx [[LOAD2:[0-9]+]] ; CHECK-NEXT: li [[LI:[0-9]+]], 1 -; CHECK-NEXT: cmpld [[CMPLD:[0-9]+]], [[LOAD1]], [[LOAD2]] -; CHECK-NEXT: subf. [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]] +; CHECK-NEXT: cmpld [[LOAD1]], [[LOAD2]] ; CHECK-NEXT: li [[LI2:[0-9]+]], -1 -; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 4 +; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0 ; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2 ; CHECK-NEXT: extsw 3, [[ISEL2]] ; CHECK-NEXT: blr @@ -35,10 +34,9 @@ ; CHECK: lwbrx [[LOAD1:[0-9]+]] ; CHECK-NEXT: lwbrx [[LOAD2:[0-9]+]] ; CHECK-NEXT: li [[LI:[0-9]+]], 1 -; CHECK-NEXT: cmpld [[CMPLD:[0-9]+]], [[LOAD1]], [[LOAD2]] -; CHECK-NEXT: subf. [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]] +; CHECK-NEXT: cmpld [[LOAD1]], [[LOAD2]] ; CHECK-NEXT: li [[LI2:[0-9]+]], -1 -; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 4 +; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0 ; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2 ; CHECK-NEXT: extsw 3, [[ISEL2]] ; CHECK-NEXT: blr @@ -57,10 +55,9 @@ ; CHECK: lhbrx [[LOAD1:[0-9]+]] ; CHECK-NEXT: lhbrx [[LOAD2:[0-9]+]] ; CHECK-NEXT: li [[LI:[0-9]+]], 1 -; CHECK-NEXT: cmpld [[CMPLD:[0-9]+]], [[LOAD1]], [[LOAD2]] -; CHECK-NEXT: subf. [[SUB:[0-9]+]], [[LOAD2]], [[LOAD1]] +; CHECK-NEXT: cmpld [[LOAD1]], [[LOAD2]] ; CHECK-NEXT: li [[LI2:[0-9]+]], -1 -; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 4 +; CHECK-NEXT: isel [[ISEL:[0-9]+]], [[LI2]], [[LI]], 0 ; CHECK-NEXT: isel [[ISEL2:[0-9]+]], 0, [[ISEL]], 2 ; CHECK-NEXT: extsw 3, [[ISEL2]] ; CHECK-NEXT: blr Index: test/CodeGen/PowerPC/memcmpIR.ll =================================================================== --- test/CodeGen/PowerPC/memcmpIR.ll +++ test/CodeGen/PowerPC/memcmpIR.ll @@ -3,48 +3,47 @@ define signext i32 @test1(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) { entry: + ; CHECK-LABEL: @test1( ; CHECK: [[LOAD1:%[0-9]+]] = load i64, i64* ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) - ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]] + ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block ; CHECK-LABEL: res_block:{{.*}} ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64 ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 ; CHECK-NEXT: br label %endblock + ; CHECK-LABEL: loadbb1:{{.*}} ; CHECK: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1 ; CHECK-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]] ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]] ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) - ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label %endblock - + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]] + ; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block + ; CHECK-BE-LABEL: @test1( ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64* ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* - ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]] - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block ; CHECK-BE-LABEL: res_block:{{.*}} ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 ; CHECK-BE-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 ; CHECK-BE-NEXT: br label %endblock + ; CHECK-BE-LABEL: loadbb1:{{.*}} ; CHECK-BE: [[GEP1:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1 ; CHECK-BE-NEXT: [[GEP2:%[0-9]+]] = getelementptr i64, i64* {{.*}}, i64 1 ; CHECK-BE-NEXT: [[LOAD1:%[0-9]+]] = load i64, i64* [[GEP1]] ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* [[GEP2]] - ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]] - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label %endblock + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block %0 = bitcast i32* %buffer1 to i8* %1 = bitcast i32* %buffer2 to i8* @@ -55,28 +54,28 @@ declare signext i32 @memcmp(i8* nocapture, i8* nocapture, i64) local_unnamed_addr #1 define signext i32 @test2(i32* nocapture readonly %buffer1, i32* nocapture readonly %buffer2) { + ; CHECK-LABEL: @test2( ; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32* ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]]) ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]]) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64 ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64 - ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label %endblock - + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-NEXT: br i1 [[ICMP]], label %endblock, label %res_block + ; CHECK-LABEL: res_block:{{.*}} ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64 ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 ; CHECK-NEXT: br label %endblock + ; CHECK-BE-LABEL: @test2( ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i32, i32* ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64 ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64 - ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label %endblock + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %endblock, label %res_block ; CHECK-BE-LABEL: res_block:{{.*}} ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 @@ -95,35 +94,35 @@ ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD1]]) ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i64 @llvm.bswap.i64(i64 [[LOAD2]]) - ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[BSWAP1]], [[BSWAP2]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[BSWAP1]], [[BSWAP2]] + ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block ; CHECK-LABEL: res_block:{{.*}} ; CHECK: [[ICMP2:%[0-9]+]] = icmp ult i64 ; CHECK-NEXT: [[SELECT:%[0-9]+]] = select i1 [[ICMP2]], i32 -1, i32 1 ; CHECK-NEXT: br label %endblock + ; CHECK-LABEL: loadbb1:{{.*}} ; CHECK: [[LOAD1:%[0-9]+]] = load i32, i32* ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD1]]) ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i32 @llvm.bswap.i32(i32 [[LOAD2]]) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[BSWAP1]] to i64 ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[BSWAP2]] to i64 - ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block + ; CHECK-LABEL: loadbb2:{{.*}} ; CHECK: [[LOAD1:%[0-9]+]] = load i16, i16* ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16* ; CHECK-NEXT: [[BSWAP1:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD1]]) ; CHECK-NEXT: [[BSWAP2:%[0-9]+]] = call i16 @llvm.bswap.i16(i16 [[LOAD2]]) ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[BSWAP1]] to i64 ; CHECK-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[BSWAP2]] to i64 - ; CHECK-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-NEXT: br i1 [[ICMP]], label %res_block, label + ; CHECK-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block + ; CHECK-LABEL: loadbb3:{{.*}} ; CHECK: [[LOAD1:%[0-9]+]] = load i8, i8* ; CHECK-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8* ; CHECK-NEXT: [[ZEXT1:%[0-9]+]] = zext i8 [[LOAD1]] to i32 @@ -133,9 +132,8 @@ ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i64, i64* ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i64, i64* - ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[LOAD1]], [[LOAD2]] - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[LOAD1]], [[LOAD2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb1, label %res_block ; CHECK-BE-LABEL: res_block:{{.*}} ; CHECK-BE: [[ICMP2:%[0-9]+]] = icmp ult i64 @@ -146,17 +144,15 @@ ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i32, i32* ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i32 [[LOAD1]] to i64 ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i32 [[LOAD2]] to i64 - ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb2, label %res_block ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i16, i16* ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i16, i16* ; CHECK-BE-NEXT: [[ZEXT1:%[0-9]+]] = zext i16 [[LOAD1]] to i64 ; CHECK-BE-NEXT: [[ZEXT2:%[0-9]+]] = zext i16 [[LOAD2]] to i64 - ; CHECK-BE-NEXT: [[SUB:%[0-9]+]] = sub i64 [[ZEXT1]], [[ZEXT2]] - ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp ne i64 [[SUB]], 0 - ; CHECK-BE-NEXT: br i1 [[ICMP]], label %res_block, label + ; CHECK-BE-NEXT: [[ICMP:%[0-9]+]] = icmp eq i64 [[ZEXT1]], [[ZEXT2]] + ; CHECK-BE-NEXT: br i1 [[ICMP]], label %loadbb3, label %res_block ; CHECK-BE: [[LOAD1:%[0-9]+]] = load i8, i8* ; CHECK-BE-NEXT: [[LOAD2:%[0-9]+]] = load i8, i8* Index: test/CodeGen/X86/memcmp.ll =================================================================== --- test/CodeGen/X86/memcmp.ll +++ test/CodeGen/X86/memcmp.ll @@ -20,18 +20,16 @@ ; X32-NEXT: rolw $8, %cx ; X32-NEXT: rolw $8, %ax ; X32-NEXT: movzwl %cx, %ecx -; X32-NEXT: movzwl %ax, %eax -; X32-NEXT: cmpl %eax, %ecx -; X32-NEXT: je .LBB0_1 -; X32-NEXT: # BB#2: # %res_block +; X32-NEXT: movzwl %ax, %edx +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: cmpl %edx, %ecx +; X32-NEXT: je .LBB0_3 +; X32-NEXT: # BB#1: # %res_block ; X32-NEXT: movl $-1, %eax -; X32-NEXT: jb .LBB0_4 -; X32-NEXT: # BB#3: # %res_block +; X32-NEXT: jb .LBB0_3 +; X32-NEXT: # BB#2: # %res_block ; X32-NEXT: movl $1, %eax -; X32-NEXT: .LBB0_4: # %endblock -; X32-NEXT: retl -; X32-NEXT: .LBB0_1: -; X32-NEXT: xorl %eax, %eax +; X32-NEXT: .LBB0_3: # %endblock ; X32-NEXT: retl ; ; X64-LABEL: length2: @@ -40,17 +38,16 @@ ; X64-NEXT: movzwl (%rsi), %ecx ; X64-NEXT: rolw $8, %ax ; X64-NEXT: rolw $8, %cx -; X64-NEXT: movzwl %ax, %eax +; X64-NEXT: movzwl %ax, %edx ; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB0_1 -; X64-NEXT: # BB#2: # %res_block +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rcx, %rdx +; X64-NEXT: je .LBB0_2 +; X64-NEXT: # BB#1: # %res_block ; X64-NEXT: movl $-1, %ecx ; X64-NEXT: movl $1, %eax ; X64-NEXT: cmovbl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB0_1: -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB0_2: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 2) nounwind ret i32 %m @@ -176,37 +173,34 @@ ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl (%ecx), %ecx -; X32-NEXT: movl (%eax), %eax +; X32-NEXT: movl (%eax), %edx ; X32-NEXT: bswapl %ecx -; X32-NEXT: bswapl %eax -; X32-NEXT: cmpl %eax, %ecx -; X32-NEXT: je .LBB6_1 -; X32-NEXT: # BB#2: # %res_block +; X32-NEXT: bswapl %edx +; X32-NEXT: xorl %eax, %eax +; X32-NEXT: cmpl %edx, %ecx +; X32-NEXT: je .LBB6_3 +; X32-NEXT: # BB#1: # %res_block ; X32-NEXT: movl $-1, %eax -; X32-NEXT: jb .LBB6_4 -; X32-NEXT: # BB#3: # %res_block +; X32-NEXT: jb .LBB6_3 +; X32-NEXT: # BB#2: # %res_block ; X32-NEXT: movl $1, %eax -; X32-NEXT: .LBB6_4: # %endblock -; X32-NEXT: retl -; X32-NEXT: .LBB6_1: -; X32-NEXT: xorl %eax, %eax +; X32-NEXT: .LBB6_3: # %endblock ; X32-NEXT: retl ; ; X64-LABEL: length4: ; X64: # BB#0: # %loadbb -; X64-NEXT: movl (%rdi), %eax -; X64-NEXT: movl (%rsi), %ecx -; X64-NEXT: bswapl %eax +; X64-NEXT: movl (%rdi), %ecx +; X64-NEXT: movl (%rsi), %edx ; X64-NEXT: bswapl %ecx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB6_1 -; X64-NEXT: # BB#2: # %res_block +; X64-NEXT: bswapl %edx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: je .LBB6_2 +; X64-NEXT: # BB#1: # %res_block ; X64-NEXT: movl $-1, %ecx ; X64-NEXT: movl $1, %eax ; X64-NEXT: cmovbl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB6_1: -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB6_2: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 4) nounwind ret i32 %m @@ -310,19 +304,18 @@ ; ; X64-LABEL: length8: ; X64: # BB#0: # %loadbb -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq (%rsi), %rcx -; X64-NEXT: bswapq %rax +; X64-NEXT: movq (%rdi), %rcx +; X64-NEXT: movq (%rsi), %rdx ; X64-NEXT: bswapq %rcx -; X64-NEXT: cmpq %rcx, %rax -; X64-NEXT: je .LBB11_1 -; X64-NEXT: # BB#2: # %res_block +; X64-NEXT: bswapq %rdx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpq %rdx, %rcx +; X64-NEXT: je .LBB11_2 +; X64-NEXT: # BB#1: # %res_block ; X64-NEXT: movl $-1, %ecx ; X64-NEXT: movl $1, %eax ; X64-NEXT: cmovbl %ecx, %eax -; X64-NEXT: retq -; X64-NEXT: .LBB11_1: -; X64-NEXT: xorl %eax, %eax +; X64-NEXT: .LBB11_2: # %endblock ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 8) nounwind ret i32 %m Index: test/Transforms/CodeGenPrepare/X86/memcmp.ll =================================================================== --- test/Transforms/CodeGenPrepare/X86/memcmp.ll +++ test/Transforms/CodeGenPrepare/X86/memcmp.ll @@ -16,15 +16,14 @@ ; X32-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) ; X32-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i32 ; X32-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32 -; X32-NEXT: [[TMP8:%.*]] = sub i32 [[TMP6]], [[TMP7]] -; X32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -; X32-NEXT: br i1 [[TMP9]], label %res_block, label %endblock +; X32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP6]], [[TMP7]] +; X32-NEXT: br i1 [[TMP8]], label %endblock, label %res_block ; X32: res_block: -; X32-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]] -; X32-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1 +; X32-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]] +; X32-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1 ; X32-NEXT: br label %endblock ; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ] +; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP10]], %res_block ] ; X32-NEXT: ret i32 [[PHI_RES]] ; ; X64-LABEL: @cmp2( @@ -37,15 +36,14 @@ ; X64-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]]) ; X64-NEXT: [[TMP6:%.*]] = zext i16 [[TMP4]] to i64 ; X64-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i64 -; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] -; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0 -; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock +; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]] +; X64-NEXT: br i1 [[TMP8]], label %endblock, label %res_block ; X64: res_block: -; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] -; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1 +; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1 ; X64-NEXT: br label %endblock ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP10]], %res_block ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 2) @@ -70,15 +68,14 @@ ; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]] ; X32-NEXT: [[TMP4:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]]) ; X32-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) -; X32-NEXT: [[TMP6:%.*]] = sub i32 [[TMP4]], [[TMP5]] -; X32-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -; X32-NEXT: br i1 [[TMP7]], label %res_block, label %endblock +; X32-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] +; X32-NEXT: br i1 [[TMP6]], label %endblock, label %res_block ; X32: res_block: -; X32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]] -; X32-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1 +; X32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]] +; X32-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 ; X32-NEXT: br label %endblock ; X32: endblock: -; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ] +; X32-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP8]], %res_block ] ; X32-NEXT: ret i32 [[PHI_RES]] ; ; X64-LABEL: @cmp4( @@ -91,15 +88,14 @@ ; X64-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]]) ; X64-NEXT: [[TMP6:%.*]] = zext i32 [[TMP4]] to i64 ; X64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP5]] to i64 -; X64-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] -; X64-NEXT: [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0 -; X64-NEXT: br i1 [[TMP9]], label %res_block, label %endblock +; X64-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP6]], [[TMP7]] +; X64-NEXT: br i1 [[TMP8]], label %endblock, label %res_block ; X64: res_block: -; X64-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] -; X64-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 -1, i32 1 +; X64-NEXT: [[TMP9:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] +; X64-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 -1, i32 1 ; X64-NEXT: br label %endblock ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP11]], %res_block ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP10]], %res_block ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 4) @@ -146,15 +142,14 @@ ; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]] ; X64-NEXT: [[TMP4:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]]) ; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]]) -; X64-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] -; X64-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0 -; X64-NEXT: br i1 [[TMP7]], label %res_block, label %endblock +; X64-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], [[TMP5]] +; X64-NEXT: br i1 [[TMP6]], label %endblock, label %res_block ; X64: res_block: -; X64-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]] -; X64-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 -1, i32 1 +; X64-NEXT: [[TMP7:%.*]] = icmp ult i64 [[TMP4]], [[TMP5]] +; X64-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 -1, i32 1 ; X64-NEXT: br label %endblock ; X64: endblock: -; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP9]], %res_block ] +; X64-NEXT: [[PHI_RES:%.*]] = phi i32 [ 0, %loadbb ], [ [[TMP8]], %res_block ] ; X64-NEXT: ret i32 [[PHI_RES]] ; %call = tail call i32 @memcmp(i8* %x, i8* %y, i64 8)