Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3060,6 +3060,25 @@ if (Tmp == SrcBits) return VTBits; } + + // We often bitcast for bitwise ops before bitcasting back to the original + // value type, so we can peek through the bitcasts and use the minimum + // number of sign bits. + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + N0.getOperand(0).getOpcode() == ISD::BITCAST && + N0.getOperand(1).getOpcode() == ISD::BITCAST) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(1).getOperand(0); + if (N00.getValueType() == VT && N01.getValueType() == VT) { + Tmp = ComputeNumSignBits(N00, Depth + 1); + if (Tmp == 1) + return Tmp; + Tmp2 = ComputeNumSignBits(N01, Depth + 1); + FirstAnswer = std::min(Tmp, Tmp2); + } + } + break; } Index: test/CodeGen/X86/bitcast-and-setcc-128.ll =================================================================== --- test/CodeGen/X86/bitcast-and-setcc-128.ll +++ test/CodeGen/X86/bitcast-and-setcc-128.ll @@ -6,33 +6,22 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 define i8 @v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) { -; SSE2-LABEL: v8i16: -; SSE2: # BB#0: -; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE2-NEXT: pcmpgtw %xmm3, %xmm2 -; SSE2-NEXT: pand %xmm0, %xmm2 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 -; SSE2-NEXT: packuswb %xmm2, %xmm2 -; SSE2-NEXT: pmovmskb %xmm2, %eax -; SSE2-NEXT: # kill: %AL %AL %EAX -; SSE2-NEXT: ret{{[l|q]}} -; -; SSSE3-LABEL: v8i16: -; SSSE3: # BB#0: -; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 -; SSSE3-NEXT: pcmpgtw %xmm3, %xmm2 -; SSSE3-NEXT: pand %xmm0, %xmm2 -; SSSE3-NEXT: pshufb {{.*#+}} xmm2 = xmm2[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: pmovmskb %xmm2, %eax -; SSSE3-NEXT: # kill: %AL %AL %EAX -; SSSE3-NEXT: ret{{[l|q]}} +; SSE2-SSSE3-LABEL: v8i16: +; SSE2-SSSE3: # BB#0: +; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm2 +; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax +; SSE2-SSSE3-NEXT: # kill: %AL %AL %EAX +; SSE2-SSSE3-NEXT: ret{{[l|q]}} ; ; AVX12-LABEL: v8i16: ; AVX12: # BB#0: ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX12-NEXT: vpcmpgtw %xmm3, %xmm2, %xmm1 ; AVX12-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX12-NEXT: vpmovmskb %xmm0, %eax ; AVX12-NEXT: # kill: %AL %AL %EAX ; AVX12-NEXT: ret{{[l|q]}} @@ -754,42 +743,23 @@ } define i8 @v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { -; SSE2-LABEL: v8i8: -; SSE2: # BB#0: -; SSE2-NEXT: psllw $8, %xmm3 -; SSE2-NEXT: psraw $8, %xmm3 -; SSE2-NEXT: psllw $8, %xmm2 -; SSE2-NEXT: psraw $8, %xmm2 -; SSE2-NEXT: pcmpgtw %xmm3, %xmm2 -; SSE2-NEXT: psllw $8, %xmm1 -; SSE2-NEXT: psraw $8, %xmm1 -; SSE2-NEXT: psllw $8, %xmm0 -; SSE2-NEXT: psraw $8, %xmm0 -; SSE2-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 -; SSE2-NEXT: packuswb %xmm0, %xmm0 -; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: # kill: %AL %AL %EAX -; SSE2-NEXT: ret{{[l|q]}} -; -; SSSE3-LABEL: v8i8: -; SSSE3: # BB#0: -; SSSE3-NEXT: psllw $8, %xmm3 -; SSSE3-NEXT: psraw $8, %xmm3 -; SSSE3-NEXT: psllw $8, %xmm2 -; SSSE3-NEXT: psraw $8, %xmm2 -; SSSE3-NEXT: pcmpgtw %xmm3, %xmm2 -; SSSE3-NEXT: psllw $8, %xmm1 -; SSSE3-NEXT: psraw $8, %xmm1 -; SSSE3-NEXT: psllw $8, %xmm0 -; SSSE3-NEXT: psraw $8, %xmm0 -; SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 -; SSSE3-NEXT: pand %xmm2, %xmm0 -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: pmovmskb %xmm0, %eax -; SSSE3-NEXT: # kill: %AL %AL %EAX -; SSSE3-NEXT: ret{{[l|q]}} +; SSE2-SSSE3-LABEL: v8i8: +; SSE2-SSSE3: # BB#0: +; SSE2-SSSE3-NEXT: psllw $8, %xmm3 +; SSE2-SSSE3-NEXT: psraw $8, %xmm3 +; SSE2-SSSE3-NEXT: psllw $8, %xmm2 +; SSE2-SSSE3-NEXT: psraw $8, %xmm2 +; SSE2-SSSE3-NEXT: pcmpgtw %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: psllw $8, %xmm1 +; SSE2-SSSE3-NEXT: psraw $8, %xmm1 +; SSE2-SSSE3-NEXT: psllw $8, %xmm0 +; SSE2-SSSE3-NEXT: psraw $8, %xmm0 +; SSE2-SSSE3-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 +; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax +; SSE2-SSSE3-NEXT: # kill: %AL %AL %EAX +; SSE2-SSSE3-NEXT: ret{{[l|q]}} ; ; AVX12-LABEL: v8i8: ; AVX12: # BB#0: @@ -804,7 +774,7 @@ ; AVX12-NEXT: vpsraw $8, %xmm0, %xmm0 ; AVX12-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX12-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX12-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ; AVX12-NEXT: vpmovmskb %xmm0, %eax ; AVX12-NEXT: # kill: %AL %AL %EAX ; AVX12-NEXT: ret{{[l|q]}} Index: test/CodeGen/X86/bitcast-and-setcc-256.ll =================================================================== --- test/CodeGen/X86/bitcast-and-setcc-256.ll +++ test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -210,34 +210,19 @@ } define i8 @v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x i32> %d) { -; SSE2-LABEL: v8i32: -; SSE2: # BB#0: -; SSE2-NEXT: pcmpgtd %xmm3, %xmm1 -; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 -; SSE2-NEXT: packssdw %xmm1, %xmm0 -; SSE2-NEXT: pcmpgtd %xmm7, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm6, %xmm4 -; SSE2-NEXT: packssdw %xmm5, %xmm4 -; SSE2-NEXT: pand %xmm0, %xmm4 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm4 -; SSE2-NEXT: packuswb %xmm4, %xmm4 -; SSE2-NEXT: pmovmskb %xmm4, %eax -; SSE2-NEXT: # kill: %AL %AL %EAX -; SSE2-NEXT: ret{{[l|q]}} -; -; SSSE3-LABEL: v8i32: -; SSSE3: # BB#0: -; SSSE3-NEXT: pcmpgtd %xmm3, %xmm1 -; SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 -; SSSE3-NEXT: packssdw %xmm1, %xmm0 -; SSSE3-NEXT: pcmpgtd %xmm7, %xmm5 -; SSSE3-NEXT: pcmpgtd %xmm6, %xmm4 -; SSSE3-NEXT: packssdw %xmm5, %xmm4 -; SSSE3-NEXT: pand %xmm0, %xmm4 -; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = xmm4[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; SSSE3-NEXT: pmovmskb %xmm4, %eax -; SSSE3-NEXT: # kill: %AL %AL %EAX -; SSSE3-NEXT: ret{{[l|q]}} +; SSE2-SSSE3-LABEL: v8i32: +; SSE2-SSSE3: # BB#0: +; SSE2-SSSE3-NEXT: pcmpgtd %xmm3, %xmm1 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm7, %xmm5 +; SSE2-SSSE3-NEXT: pcmpgtd %xmm6, %xmm4 +; SSE2-SSSE3-NEXT: packssdw %xmm5, %xmm4 +; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 +; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm4 +; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax +; SSE2-SSSE3-NEXT: # kill: %AL %AL %EAX +; SSE2-SSSE3-NEXT: ret{{[l|q]}} ; ; AVX1-LABEL: v8i32: ; AVX1: # BB#0: Index: test/CodeGen/X86/widen_arith-2.ll =================================================================== --- test/CodeGen/X86/widen_arith-2.ll +++ test/CodeGen/X86/widen_arith-2.ll @@ -24,7 +24,7 @@ ; CHECK-NEXT: pmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; CHECK-NEXT: psubw %xmm0, %xmm2 ; CHECK-NEXT: pand %xmm1, %xmm2 -; CHECK-NEXT: packsswb %xmm0, %xmm2 +; CHECK-NEXT: packuswb %xmm0, %xmm2 ; CHECK-NEXT: movq %xmm2, (%edx,%eax,8) ; CHECK-NEXT: incl (%esp) ; CHECK-NEXT: .LBB0_1: # %forcond