Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -33592,6 +33592,20 @@ DAG.getBitcast(MVT::v4f32, N->getOperand(1)))); } + // Use a 32-bit and+zext if upper bits known zero. + if (VT == MVT::i64 && Subtarget.is64Bit() && + !isa(N->getOperand(1))) { + APInt HiMask = APInt::getHighBitsSet(64, 32); + if (DAG.MaskedValueIsZero(N->getOperand(1), HiMask) || + DAG.MaskedValueIsZero(N->getOperand(0), HiMask)) { + SDLoc dl(N); + SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(0)); + SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(1)); + return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, + DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS)); + } + } + if (DCI.isBeforeLegalizeOps()) return SDValue(); Index: test/CodeGen/X86/bmi.ll =================================================================== --- test/CodeGen/X86/bmi.ll +++ test/CodeGen/X86/bmi.ll @@ -822,13 +822,13 @@ ret i64 %r } -; The add here gets shrunk, but the and does not thus hiding the blsr pattern. +; The add here used to get shrunk, but the and did not thus hiding the blsr pattern. +; We now use the knowledge that upper bits of the shift guarantee the and result has 0s in the upper bits to reduce it too. define i64 @blsr_disguised_shrunk_add(i64 %x) { ; CHECK-LABEL: blsr_disguised_shrunk_add: ; CHECK: # %bb.0: ; CHECK-NEXT: shrq $48, %rdi -; CHECK-NEXT: leal -1(%rdi), %eax -; CHECK-NEXT: andq %rdi, %rax +; CHECK-NEXT: blsrl %edi, %eax ; CHECK-NEXT: retq %a = lshr i64 %x, 48 %b = add i64 %a, -1 Index: test/CodeGen/X86/gather-addresses.ll =================================================================== --- test/CodeGen/X86/gather-addresses.ll +++ test/CodeGen/X86/gather-addresses.ll @@ -145,15 +145,15 @@ ; LIN-SSE2-NEXT: movd %xmm1, %esi ; LIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] ; LIN-SSE2-NEXT: movd %xmm0, %edi -; LIN-SSE2-NEXT: andq %rcx, %rax -; LIN-SSE2-NEXT: andq %rcx, %rdx -; LIN-SSE2-NEXT: andq %rcx, %rsi -; LIN-SSE2-NEXT: andq %rcx, %rdi -; LIN-SSE2-NEXT: movq %rax, %xmm0 -; LIN-SSE2-NEXT: movq %rdx, %xmm1 +; LIN-SSE2-NEXT: andl %ecx, %eax +; LIN-SSE2-NEXT: andl %ecx, %edx +; LIN-SSE2-NEXT: andl %ecx, %esi +; LIN-SSE2-NEXT: andl %ecx, %edi +; LIN-SSE2-NEXT: movd %eax, %xmm0 +; LIN-SSE2-NEXT: movd %edx, %xmm1 ; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; LIN-SSE2-NEXT: movq %rdi, %xmm2 -; LIN-SSE2-NEXT: movq %rsi, %xmm1 +; LIN-SSE2-NEXT: movd %edi, %xmm2 +; LIN-SSE2-NEXT: movd %esi, %xmm1 ; LIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; LIN-SSE2-NEXT: retq ; @@ -165,15 +165,15 @@ ; LIN-SSE4-NEXT: pextrd $1, %xmm0, %edx ; LIN-SSE4-NEXT: pextrd $2, %xmm0, %esi ; LIN-SSE4-NEXT: pextrd $3, %xmm0, %edi -; LIN-SSE4-NEXT: andq %rcx, %rax -; LIN-SSE4-NEXT: andq %rcx, %rdx -; LIN-SSE4-NEXT: andq %rcx, %rsi -; LIN-SSE4-NEXT: andq %rcx, %rdi -; LIN-SSE4-NEXT: movq %rdx, %xmm1 -; LIN-SSE4-NEXT: movq %rax, %xmm0 +; LIN-SSE4-NEXT: andl %ecx, %eax +; LIN-SSE4-NEXT: andl %ecx, %edx +; LIN-SSE4-NEXT: andl %ecx, %esi +; LIN-SSE4-NEXT: andl %ecx, %edi +; LIN-SSE4-NEXT: movd %edx, %xmm1 +; LIN-SSE4-NEXT: movd %eax, %xmm0 ; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; LIN-SSE4-NEXT: movq %rdi, %xmm2 -; LIN-SSE4-NEXT: movq %rsi, %xmm1 +; LIN-SSE4-NEXT: movd %edi, %xmm2 +; LIN-SSE4-NEXT: movd %esi, %xmm1 ; LIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; LIN-SSE4-NEXT: retq ; @@ -188,15 +188,15 @@ ; WIN-SSE2-NEXT: movd %xmm1, %r8d ; WIN-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] ; WIN-SSE2-NEXT: movd %xmm0, %edx -; WIN-SSE2-NEXT: andq %r9, %rax -; WIN-SSE2-NEXT: andq %r9, %rcx -; WIN-SSE2-NEXT: andq %r9, %r8 -; WIN-SSE2-NEXT: andq %r9, %rdx -; WIN-SSE2-NEXT: movq %rax, %xmm0 -; WIN-SSE2-NEXT: movq %rcx, %xmm1 +; WIN-SSE2-NEXT: andl %r9d, %eax +; WIN-SSE2-NEXT: andl %r9d, %ecx +; WIN-SSE2-NEXT: andl %r9d, %r8d +; WIN-SSE2-NEXT: andl %r9d, %edx +; WIN-SSE2-NEXT: movd %eax, %xmm0 +; WIN-SSE2-NEXT: movd %ecx, %xmm1 ; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; WIN-SSE2-NEXT: movq %rdx, %xmm2 -; WIN-SSE2-NEXT: movq %r8, %xmm1 +; WIN-SSE2-NEXT: movd %edx, %xmm2 +; WIN-SSE2-NEXT: movd %r8d, %xmm1 ; WIN-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; WIN-SSE2-NEXT: retq ; @@ -208,15 +208,15 @@ ; WIN-SSE4-NEXT: pextrd $1, %xmm0, %ecx ; WIN-SSE4-NEXT: pextrd $2, %xmm0, %r8d ; WIN-SSE4-NEXT: pextrd $3, %xmm0, %edx -; WIN-SSE4-NEXT: andq %r9, %rax -; WIN-SSE4-NEXT: andq %r9, %rcx -; WIN-SSE4-NEXT: andq %r9, %r8 -; WIN-SSE4-NEXT: andq %r9, %rdx -; WIN-SSE4-NEXT: movq %rcx, %xmm1 -; WIN-SSE4-NEXT: movq %rax, %xmm0 +; WIN-SSE4-NEXT: andl %r9d, %eax +; WIN-SSE4-NEXT: andl %r9d, %ecx +; WIN-SSE4-NEXT: andl %r9d, %r8d +; WIN-SSE4-NEXT: andl %r9d, %edx +; WIN-SSE4-NEXT: movd %ecx, %xmm1 +; WIN-SSE4-NEXT: movd %eax, %xmm0 ; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; WIN-SSE4-NEXT: movq %rdx, %xmm2 -; WIN-SSE4-NEXT: movq %r8, %xmm1 +; WIN-SSE4-NEXT: movd %edx, %xmm2 +; WIN-SSE4-NEXT: movd %r8d, %xmm1 ; WIN-SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; WIN-SSE4-NEXT: retq ; Index: test/CodeGen/X86/pr32284.ll =================================================================== --- test/CodeGen/X86/pr32284.ll +++ test/CodeGen/X86/pr32284.ll @@ -479,17 +479,16 @@ ; X64-LABEL: f3: ; X64: # %bb.0: # %entry ; X64-NEXT: movl {{.*}}(%rip), %eax -; X64-NEXT: movl $4294967295, %ecx # imm = 0xFFFFFFFF -; X64-NEXT: xorq %rax, %rcx -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: testq %rax, %rax -; X64-NEXT: sete %dl -; X64-NEXT: movl {{.*}}(%rip), %eax -; X64-NEXT: xorl %ecx, %eax -; X64-NEXT: andq %rdx, %rax -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) -; X64-NEXT: movl %ecx, {{.*}}(%rip) +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: testl %eax, %eax +; X64-NEXT: notl %eax +; X64-NEXT: sete %cl +; X64-NEXT: movl {{.*}}(%rip), %edx +; X64-NEXT: xorl %eax, %edx +; X64-NEXT: andl %edx, %ecx +; X64-NEXT: orl %eax, %ecx +; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %eax, {{.*}}(%rip) ; X64-NEXT: retq ; ; 686-O0-LABEL: f3: