Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1787,6 +1787,22 @@ KnownUndef &= SrcUndef; break; } + case ISD::AND: { + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, + SrcZero, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + + // If either side has a zero element, then the result element is zero, even + // if the other is an UNDEF. + KnownZero |= SrcZero; + KnownUndef &= SrcUndef; + KnownUndef &= ~KnownZero; + break; + } case ISD::TRUNCATE: case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: Index: llvm/trunk/test/CodeGen/SystemZ/vec-trunc-to-i1.ll =================================================================== --- llvm/trunk/test/CodeGen/SystemZ/vec-trunc-to-i1.ll +++ llvm/trunk/test/CodeGen/SystemZ/vec-trunc-to-i1.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; ; Check that a widening truncate to a vector of i1 elements can be handled. @@ -5,19 +6,14 @@ define void @pr32275(<4 x i8> %B15) { ; CHECK-LABEL: pr32275: ; CHECK: # %bb.0: # %BB -; CHECK: vlgvb %r0, %v24, 3 -; CHECK-NEXT: vlgvb %r1, %v24, 1 -; CHECK-NEXT: vlvgp [[REG1:%v[0-9]]], %r1, %r0 -; CHECK-NEXT: vlgvb %r0, %v24, 0 -; CHECK-NEXT: vlgvb [[REG3:%r[0-9]]], %v24, 2 -; CHECK-NEXT: vrepif [[REG0:%v[0-9]]], 1 -; CHECK: .LBB0_1: -; CHECK-DAG: vlr [[REG2:%v[0-9]]], [[REG1]] -; CHECK-DAG: vlvgf [[REG2]], %r0, 0 -; CHECK-NEXT: vlvgf [[REG2]], [[REG3]], 2 -; CHECK-NEXT: vn [[REG2]], [[REG2]], [[REG0]] -; CHECK-NEXT: vlgvf [[REG4:%r[0-9]]], [[REG2]], 3 -; CHECK-NEXT: cijlh [[REG4]], 0, .LBB0_1 +; CHECK-NEXT: vlgvb %r0, %v24, 3 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vrepif %v1, 1 +; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vlgvf %r0, %v0, 3 +; CHECK-NEXT: .LBB0_1: # %CF34 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cijlh %r0, 0, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %CF36 ; CHECK-NEXT: br %r14 BB: Index: llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll +++ llvm/trunk/test/CodeGen/X86/copysign-constant-magnitude.ll @@ -26,7 +26,6 @@ ; CHECK-LABEL: mag_neg0_double: ; CHECK: ## %bb.0: ; CHECK-NEXT: movsd [[SIGNMASK2]](%rip), %xmm1 -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0,0] ; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; @@ -92,7 +91,6 @@ ; CHECK-LABEL: mag_neg0_float: ; CHECK: ## %bb.0: ; CHECK-NEXT: movss [[SIGNMASK6]](%rip), %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0] ; CHECK-NEXT: andps %xmm1, %xmm0 ; CHECK-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/known-bits-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/known-bits-vector.ll +++ llvm/trunk/test/CodeGen/X86/known-bits-vector.ll @@ -5,13 +5,17 @@ define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind { ; X32-LABEL: knownbits_mask_extract_sext: ; X32: # %bb.0: -; X32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; X32-NEXT: movl $15, %eax +; X32-NEXT: vmovd %eax, %xmm1 +; X32-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X32-NEXT: vpextrw $0, %xmm0, %eax ; X32-NEXT: retl ; ; X64-LABEL: knownbits_mask_extract_sext: ; X64: # %bb.0: -; X64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; X64-NEXT: movl $15, %eax +; X64-NEXT: vmovd %eax, %xmm1 +; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vpextrw $0, %xmm0, %eax ; X64-NEXT: retq %1 = and <8 x i16> %a0, Index: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll +++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll @@ -253,16 +253,16 @@ ; X32-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp: ; X32: # %bb.0: ; X32-NEXT: pushl %eax -; X32-NEXT: vpsrlq $60, %xmm0, %xmm2 +; X32-NEXT: vpsrlq $60, %xmm0, %xmm1 ; X32-NEXT: vpsrlq $61, %xmm0, %xmm0 -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [4,0,0,0,8,0,0,0] -; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0 -; X32-NEXT: vpinsrd $0, {{[0-9]+}}(%esp), %xmm1, %xmm1 +; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; X32-NEXT: vmovdqa {{.*#+}} xmm1 = [4,0,0,0,8,0,0,0] +; X32-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X32-NEXT: vpsubq %xmm1, %xmm0, %xmm0 +; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X32-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X32-NEXT: vmovd %xmm0, %eax -; X32-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0 +; X32-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 ; X32-NEXT: vmovss %xmm0, (%esp) ; X32-NEXT: flds (%esp) ; X32-NEXT: popl %eax @@ -270,17 +270,17 @@ ; ; X64-LABEL: signbits_ashr_sext_sextinreg_and_extract_sitofp: ; X64: # %bb.0: -; X64-NEXT: vpsrlq $60, %xmm0, %xmm2 +; X64-NEXT: vpsrlq $60, %xmm0, %xmm1 ; X64-NEXT: vpsrlq $61, %xmm0, %xmm0 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X64-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8] -; X64-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; X64-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] +; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8] +; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ; X64-NEXT: movslq %edi, %rax -; X64-NEXT: vpinsrq $0, %rax, %xmm1, %xmm1 +; X64-NEXT: vmovq %rax, %xmm1 ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: vcvtsi2ssl %eax, %xmm3, %xmm0 +; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 ; X64-NEXT: retq %1 = ashr <2 x i64> %a0, %2 = sext i32 %a2 to i64 Index: llvm/trunk/test/CodeGen/X86/vector-shift-lshr-sub128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shift-lshr-sub128.ll +++ llvm/trunk/test/CodeGen/X86/vector-shift-lshr-sub128.ll @@ -861,15 +861,13 @@ ; X32-SSE: # %bb.0: ; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0] ; X32-SSE-NEXT: pand %xmm2, %xmm0 -; X32-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1] -; X32-SSE-NEXT: pand %xmm2, %xmm3 +; X32-SSE-NEXT: pand %xmm1, %xmm2 +; X32-SSE-NEXT: movdqa %xmm0, %xmm3 +; X32-SSE-NEXT: psrlq %xmm2, %xmm3 ; X32-SSE-NEXT: pxor %xmm2, %xmm2 ; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] -; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psrlq %xmm2, %xmm1 -; X32-SSE-NEXT: psrlq %xmm3, %xmm0 -; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; X32-SSE-NEXT: movapd %xmm1, %xmm0 +; X32-SSE-NEXT: psrlq %xmm2, %xmm0 +; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1] ; X32-SSE-NEXT: retl %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer %shift = lshr <2 x i32> %a, %splat Index: llvm/trunk/test/CodeGen/X86/vector-shift-shl-sub128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shift-shl-sub128.ll +++ llvm/trunk/test/CodeGen/X86/vector-shift-shl-sub128.ll @@ -639,15 +639,14 @@ ; ; X32-SSE-LABEL: splatvar_shift_v2i32: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1] -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm2 -; X32-SSE-NEXT: xorps %xmm3, %xmm3 -; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] -; X32-SSE-NEXT: movdqa %xmm0, %xmm1 -; X32-SSE-NEXT: psllq %xmm3, %xmm1 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0] +; X32-SSE-NEXT: pand %xmm1, %xmm2 +; X32-SSE-NEXT: movdqa %xmm0, %xmm3 +; X32-SSE-NEXT: psllq %xmm2, %xmm3 +; X32-SSE-NEXT: pxor %xmm2, %xmm2 +; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] ; X32-SSE-NEXT: psllq %xmm2, %xmm0 -; X32-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; X32-SSE-NEXT: movapd %xmm1, %xmm0 +; X32-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1] ; X32-SSE-NEXT: retl %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer %shift = shl <2 x i32> %a, %splat