diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12640,10 +12640,37 @@ if (SDValue ExtLoad = CombineExtLoad(N)) return ExtLoad; - // fold (zext (and/or/xor (load x), cst)) -> - // (and/or/xor (zextload x), (zext cst)) - // Unless (and (load x) cst) will match as a zextload already and has - // additional users. + // (zext (xor/and/or (load x), (load y))) + // -> + // (xor/and/or (zextload x), (zextload y)) + // May be possible to extend, currently limited to the simplest cases. + if (!VT.isVector() && + (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR)) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + LoadSDNode *L00 = dyn_cast(N00); + LoadSDNode *L01 = dyn_cast(N01); + auto CanExtendLoad = [&](LoadSDNode *Load, SDValue LoadValue) { + return (Load->hasOneUse() && + Load->getExtensionType() == ISD::NON_EXTLOAD && + Load->isUnindexed() && Load->isSimple() && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, LoadValue.getValueType())); + }; + if (L00 && L01 && CanExtendLoad(L00, N00) && CanExtendLoad(L01, N01)) { + auto GetExtLoad = [&](LoadSDNode *Load, SDValue LoadValue) { + return DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LoadValue), VT, + Load->getChain(), Load->getBasePtr(), + LoadValue.getValueType(), Load->getMemOperand()); + }; + SDValue ExtL00 = GetExtLoad(L00, N00); + SDValue ExtL01 = GetExtLoad(L01, N01); + SDValue Op = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ExtL00, ExtL01); + CombineTo(N, Op); + return SDValue(N, 0); + } + } + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa(N0.getOperand(0)) && diff --git a/llvm/test/CodeGen/AArch64/bcmp.ll b/llvm/test/CodeGen/AArch64/bcmp.ll --- a/llvm/test/CodeGen/AArch64/bcmp.ll +++ b/llvm/test/CodeGen/AArch64/bcmp.ll @@ -46,10 +46,8 @@ ; CHECK-NEXT: ldrh w9, [x1] ; CHECK-NEXT: ldrb w10, [x0, #2] ; CHECK-NEXT: ldrb w11, [x1, #2] -; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w9, w10, w11 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 3) @@ -77,10 +75,8 @@ ; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: ldrb w10, [x0, #4] ; CHECK-NEXT: ldrb w11, [x1, #4] -; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w9, w10, w11 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 5) @@ -95,10 +91,8 @@ ; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: ldrh w10, [x0, #4] ; CHECK-NEXT: ldrh w11, [x1, #4] -; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w9, w10, w11 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ccmp w10, w11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 6) @@ -138,15 +132,12 @@ define i1 @bcmp9(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp9: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w9, [x0, #8] -; CHECK-NEXT: ldrb w10, [x1, #8] ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x11, [x1] -; CHECK-NEXT: eor w9, w9, w10 -; CHECK-NEXT: and x9, x9, #0xff -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #8] +; CHECK-NEXT: ldrb w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 9) @@ -157,15 +148,12 @@ define i1 @bcmp10(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp10: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0, #8] -; CHECK-NEXT: ldrh w10, [x1, #8] ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x11, [x1] -; CHECK-NEXT: eor w9, w9, w10 -; CHECK-NEXT: and x9, x9, #0xffff -; CHECK-NEXT: eor x8, x8, x11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: ldrh w10, [x0, #8] +; CHECK-NEXT: ldrh w11, [x1, #8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 10) @@ -196,10 +184,8 @@ ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: ldr w10, [x0, #8] ; CHECK-NEXT: ldr w11, [x1, #8] -; CHECK-NEXT: eor x8, x8, x9 -; CHECK-NEXT: eor w9, w10, w11 -; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 12) @@ -278,7 +264,7 @@ ; CHECK-NEXT: ldr w13, [x1, #16] ; CHECK-NEXT: eor x8, x8, x10 ; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: eor w10, w12, w13 +; CHECK-NEXT: eor x10, x12, x13 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: orr x8, x8, x10 ; CHECK-NEXT: cmp x8, #0 @@ -322,7 +308,7 @@ ; CHECK-NEXT: eor x9, x9, x11 ; CHECK-NEXT: eor x10, x12, x13 ; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: eor w11, w14, w15 +; CHECK-NEXT: eor x11, x14, x15 ; CHECK-NEXT: orr x9, x10, x11 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: cmp x8, #0 @@ -342,14 +328,13 @@ ; CHECK-NEXT: ldp x14, x15, [x1, #16] ; CHECK-NEXT: eor x8, x8, x10 ; CHECK-NEXT: eor x9, x9, x11 -; CHECK-NEXT: ldrb w16, [x0, #32] +; CHECK-NEXT: ldrb w10, [x0, #32] ; CHECK-NEXT: orr x8, x8, x9 -; CHECK-NEXT: ldrb w17, [x1, #32] -; CHECK-NEXT: eor x10, x12, x14 -; CHECK-NEXT: eor x11, x13, x15 -; CHECK-NEXT: eor w12, w16, w17 -; CHECK-NEXT: orr x9, x10, x11 -; CHECK-NEXT: and x10, x12, #0xff +; CHECK-NEXT: ldrb w11, [x1, #32] +; CHECK-NEXT: eor x12, x12, x14 +; CHECK-NEXT: eor x13, x13, x15 +; CHECK-NEXT: orr x9, x12, x13 +; CHECK-NEXT: eor x10, x10, x11 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: orr x8, x8, x10 ; CHECK-NEXT: cmp x8, #0 diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll @@ -178,8 +178,8 @@ ; X86-NEXT: movzwl (%ecx), %edx ; X86-NEXT: xorw (%eax), %dx ; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: xorb 2(%eax), %cl -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orw %dx, %ax ; X86-NEXT: setne %al ; X86-NEXT: retl @@ -311,8 +311,8 @@ ; X86-NEXT: movl (%ecx), %edx ; X86-NEXT: xorl (%eax), %edx ; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: xorb 4(%eax), %cl -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orl %edx, %eax ; X86-NEXT: setne %al ; X86-NEXT: retl @@ -514,8 +514,8 @@ ; X86-NEXT: xorl 4(%eax), %esi ; X86-NEXT: orl %edx, %esi ; X86-NEXT: movzbl 8(%ecx), %ecx -; X86-NEXT: xorb 8(%eax), %cl -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl 8(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orl %esi, %eax ; X86-NEXT: sete %al ; X86-NEXT: popl %esi @@ -537,8 +537,8 @@ ; X86-NEXT: xorl 4(%eax), %esi ; X86-NEXT: orl %edx, %esi ; X86-NEXT: movzwl 8(%ecx), %ecx -; X86-NEXT: xorw 8(%eax), %cx -; X86-NEXT: movzwl %cx, %eax +; X86-NEXT: movzwl 8(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orl %esi, %eax ; X86-NEXT: sete %al ; X86-NEXT: popl %esi @@ -645,8 +645,8 @@ ; X86-NEXT: movl 8(%edx), %esi ; X86-NEXT: xorl 8(%ecx), %esi ; X86-NEXT: movzbl 12(%edx), %edx -; X86-NEXT: xorb 12(%ecx), %dl -; X86-NEXT: movzbl %dl, %ecx +; X86-NEXT: movzbl 12(%ecx), %ecx +; X86-NEXT: xorl %edx, %ecx ; X86-NEXT: orl %esi, %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: sete %al @@ -671,8 +671,8 @@ ; X86-NEXT: movl 8(%edx), %esi ; X86-NEXT: xorl 8(%ecx), %esi ; X86-NEXT: movzwl 12(%edx), %edx -; X86-NEXT: xorw 12(%ecx), %dx -; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: movzwl 12(%ecx), %ecx +; X86-NEXT: xorl %edx, %ecx ; X86-NEXT: orl %esi, %ecx ; X86-NEXT: orl %eax, %ecx ; X86-NEXT: sete %al diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -167,9 +167,9 @@ ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: xorw (%rsi), %ax ; X64-NEXT: movzbl 2(%rdi), %ecx -; X64-NEXT: xorb 2(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orw %ax, %cx +; X64-NEXT: movzbl 2(%rsi), %edx +; X64-NEXT: xorl %ecx, %edx +; X64-NEXT: orw %ax, %dx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind @@ -284,9 +284,9 @@ ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: xorl (%rsi), %eax ; X64-NEXT: movzbl 4(%rdi), %ecx -; X64-NEXT: xorb 4(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orl %eax, %ecx +; X64-NEXT: movzbl 4(%rsi), %edx +; X64-NEXT: xorl %ecx, %edx +; X64-NEXT: orl %eax, %edx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind @@ -443,9 +443,9 @@ ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax ; X64-NEXT: movzbl 8(%rdi), %ecx -; X64-NEXT: xorb 8(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movzbl 8(%rsi), %edx +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx ; X64-NEXT: sete %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind @@ -459,9 +459,9 @@ ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax ; X64-NEXT: movzwl 8(%rdi), %ecx -; X64-NEXT: xorw 8(%rsi), %cx -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movzwl 8(%rsi), %edx +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx ; X64-NEXT: sete %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind @@ -490,8 +490,9 @@ ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax ; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: xorl 8(%rsi), %ecx -; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind diff --git a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll --- a/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize-x32.ll @@ -106,9 +106,9 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %edx ; X86-NEXT: xorw (%eax), %dx -; X86-NEXT: movb 2(%ecx), %cl -; X86-NEXT: xorb 2(%eax), %cl -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orw %dx, %ax ; X86-NEXT: setne %al ; X86-NEXT: retl @@ -197,9 +197,9 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %edx ; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: movb 4(%ecx), %cl -; X86-NEXT: xorb 4(%eax), %cl -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orl %edx, %eax ; X86-NEXT: setne %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -94,10 +94,10 @@ ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: xorw (%rsi), %ax -; X64-NEXT: movb 2(%rdi), %cl -; X64-NEXT: xorb 2(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orw %ax, %cx +; X64-NEXT: movzbl 2(%rdi), %ecx +; X64-NEXT: movzbl 2(%rsi), %edx +; X64-NEXT: xorl %ecx, %edx +; X64-NEXT: orw %ax, %dx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind @@ -173,10 +173,10 @@ ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: movb 4(%rdi), %cl -; X64-NEXT: xorb 4(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orl %eax, %ecx +; X64-NEXT: movzbl 4(%rdi), %ecx +; X64-NEXT: movzbl 4(%rsi), %edx +; X64-NEXT: xorl %ecx, %edx +; X64-NEXT: orl %eax, %edx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind @@ -230,8 +230,9 @@ ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax ; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: xorl 8(%rsi), %ecx -; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind diff --git a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll --- a/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso-x32.ll @@ -106,9 +106,9 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %edx ; X86-NEXT: xorw (%eax), %dx -; X86-NEXT: movb 2(%ecx), %cl -; X86-NEXT: xorb 2(%eax), %cl -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orw %dx, %ax ; X86-NEXT: setne %al ; X86-NEXT: retl @@ -197,9 +197,9 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %edx ; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: movb 4(%ecx), %cl -; X86-NEXT: xorb 4(%eax), %cl -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orl %edx, %eax ; X86-NEXT: setne %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/memcmp-pgso.ll b/llvm/test/CodeGen/X86/memcmp-pgso.ll --- a/llvm/test/CodeGen/X86/memcmp-pgso.ll +++ b/llvm/test/CodeGen/X86/memcmp-pgso.ll @@ -94,10 +94,10 @@ ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: xorw (%rsi), %ax -; X64-NEXT: movb 2(%rdi), %cl -; X64-NEXT: xorb 2(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orw %ax, %cx +; X64-NEXT: movzbl 2(%rdi), %ecx +; X64-NEXT: movzbl 2(%rsi), %edx +; X64-NEXT: xorl %ecx, %edx +; X64-NEXT: orw %ax, %dx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind @@ -173,10 +173,10 @@ ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: movb 4(%rdi), %cl -; X64-NEXT: xorb 4(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orl %eax, %ecx +; X64-NEXT: movzbl 4(%rdi), %ecx +; X64-NEXT: movzbl 4(%rsi), %edx +; X64-NEXT: xorl %ecx, %edx +; X64-NEXT: orl %eax, %edx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind @@ -230,8 +230,9 @@ ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax ; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: xorl 8(%rsi), %ecx -; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll --- a/llvm/test/CodeGen/X86/memcmp-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-x32.ll @@ -206,8 +206,8 @@ ; X86-NEXT: movzwl (%ecx), %edx ; X86-NEXT: xorw (%eax), %dx ; X86-NEXT: movzbl 2(%ecx), %ecx -; X86-NEXT: xorb 2(%eax), %cl -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl 2(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orw %dx, %ax ; X86-NEXT: setne %al ; X86-NEXT: retl @@ -339,8 +339,8 @@ ; X86-NEXT: movl (%ecx), %edx ; X86-NEXT: xorl (%eax), %edx ; X86-NEXT: movzbl 4(%ecx), %ecx -; X86-NEXT: xorb 4(%eax), %cl -; X86-NEXT: movzbl %cl, %eax +; X86-NEXT: movzbl 4(%eax), %eax +; X86-NEXT: xorl %ecx, %eax ; X86-NEXT: orl %edx, %eax ; X86-NEXT: setne %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -193,9 +193,9 @@ ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: xorw (%rsi), %ax ; X64-NEXT: movzbl 2(%rdi), %ecx -; X64-NEXT: xorb 2(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orw %ax, %cx +; X64-NEXT: movzbl 2(%rsi), %edx +; X64-NEXT: xorl %ecx, %edx +; X64-NEXT: orw %ax, %dx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 3) nounwind @@ -310,9 +310,9 @@ ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: xorl (%rsi), %eax ; X64-NEXT: movzbl 4(%rdi), %ecx -; X64-NEXT: xorb 4(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orl %eax, %ecx +; X64-NEXT: movzbl 4(%rsi), %edx +; X64-NEXT: xorl %ecx, %edx +; X64-NEXT: orl %eax, %edx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 5) nounwind @@ -469,9 +469,9 @@ ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax ; X64-NEXT: movzbl 8(%rdi), %ecx -; X64-NEXT: xorb 8(%rsi), %cl -; X64-NEXT: movzbl %cl, %ecx -; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movzbl 8(%rsi), %edx +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx ; X64-NEXT: sete %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 9) nounwind @@ -485,9 +485,9 @@ ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax ; X64-NEXT: movzwl 8(%rdi), %ecx -; X64-NEXT: xorw 8(%rsi), %cx -; X64-NEXT: movzwl %cx, %ecx -; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movzwl 8(%rsi), %edx +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx ; X64-NEXT: sete %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 10) nounwind @@ -516,8 +516,9 @@ ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax ; X64-NEXT: movl 8(%rdi), %ecx -; X64-NEXT: xorl 8(%rsi), %ecx -; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movl 8(%rsi), %edx +; X64-NEXT: xorq %rcx, %rdx +; X64-NEXT: orq %rax, %rdx ; X64-NEXT: setne %al ; X64-NEXT: retq %m = tail call i32 @memcmp(ptr %X, ptr %Y, i64 12) nounwind