Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4275,7 +4275,10 @@ if (!N0.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); - if (!N01C || N01C->getZExtValue() != 0xFF00) + // Also handle 0xffff since the LHS is guaranteed to have zeros there. + // This is needed for X86. + if (!N01C || (N01C->getZExtValue() != 0xFF00 && + N01C->getZExtValue() != 0xFFFF)) return SDValue(); N0 = N0.getOperand(0); LookPassAnd0 = true; @@ -4322,7 +4325,10 @@ if (!N10.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N101C = dyn_cast(N10.getOperand(1)); - if (!N101C || N101C->getZExtValue() != 0xFF00) + // Also allow 0xFFFF since the bits will be shifted out. This is needed + // for X86. + if (!N101C || (N101C->getZExtValue() != 0xFF00 && + N101C->getZExtValue() != 0xFFFF)) return SDValue(); N10 = N10.getOperand(0); LookPassAnd1 = true; @@ -4395,6 +4401,14 @@ case 0xFF00: MaskByteOffset = 1; break; case 0xFF0000: MaskByteOffset = 2; break; case 0xFF000000: MaskByteOffset = 3; break; + case 0xFFFF: + // In case demanded bits didn't clear the bits that will be shifted out. + // This is needed for X86. + if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) { + MaskByteOffset = 1; + break; + } + return false; } // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -28343,9 +28343,9 @@ if (ZeroExtendMask == Mask) return true; - // Make sure the bits in the ZeroExtendMask are also set in the original mask. - // TODO: We should be able to set bits that aren't demanded too. - if (!ZeroExtendMask.isSubsetOf(Mask)) + // Make sure the new mask can be represented by a combination of mask bits + // and non-demanded bits. + if (!ZeroExtendMask.isSubsetOf(Mask | ~Demanded)) return false; // Replace the constant with the zero extend mask. Index: test/CodeGen/X86/pr12360.ll =================================================================== --- test/CodeGen/X86/pr12360.ll +++ test/CodeGen/X86/pr12360.ll @@ -45,9 +45,9 @@ define zeroext i1 @f4(i32 %x) { ; CHECK-LABEL: f4: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: shrl $15, %edi -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $15, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: retq entry: Index: test/CodeGen/X86/pr32284.ll =================================================================== --- test/CodeGen/X86/pr32284.ll +++ test/CodeGen/X86/pr32284.ll @@ -514,8 +514,6 @@ ; 686-O0-NEXT: movl %ecx, %edi ; 686-O0-NEXT: xorl %esi, %edi ; 686-O0-NEXT: andl %edi, %eax -; 686-O0-NEXT: movb %al, %dl -; 686-O0-NEXT: movzbl %dl, %eax ; 686-O0-NEXT: orl %eax, %ecx ; 686-O0-NEXT: movl %ecx, (%esp) ; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -545,9 +543,8 @@ ; 686-NEXT: movl var_16, %edx ; 686-NEXT: xorl %ecx, %edx ; 686-NEXT: andl %eax, %edx -; 686-NEXT: movzbl %dl, %eax -; 686-NEXT: orl %ecx, %eax -; 686-NEXT: movl %eax, (%esp) +; 686-NEXT: orl %ecx, %edx +; 686-NEXT: movl %edx, (%esp) ; 686-NEXT: movl $0, {{[0-9]+}}(%esp) ; 686-NEXT: movl %ecx, var_46 ; 686-NEXT: movl %ebp, %esp Index: test/CodeGen/X86/zext-demanded.ll =================================================================== --- test/CodeGen/X86/zext-demanded.ll +++ test/CodeGen/X86/zext-demanded.ll @@ -76,9 +76,8 @@ define i32 @test7(i32 %x) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: andl $65534, %edi # imm = 0xFFFE -; CHECK-NEXT: leal 1(%rdi), %eax +; CHECK-NEXT: orl $1, %edi +; CHECK-NEXT: movzwl %di, %eax ; CHECK-NEXT: retq %y = and i32 %x, 65534 %z = or i32 %y, 1 @@ -138,3 +137,14 @@ ret i64 %r } +define i32 @PR36689(i32*) { +; CHECK-LABEL: PR36689: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: orl $255, %eax +; CHECK-NEXT: retq + %2 = load i32, i32* %0 + %3 = and i32 %2, 65280 + %4 = or i32 %3, 255 + ret i32 %4 +}