diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3431,7 +3431,6 @@ } Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - if (Known.isUnknown()) break; // Early-out Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); if (IsMax) Known = KnownBits::smax(Known, Known2); diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll --- a/llvm/test/CodeGen/X86/avx512-trunc.ll +++ b/llvm/test/CodeGen/X86/avx512-trunc.ll @@ -1007,21 +1007,14 @@ define void @negative_test1_smax_usat_trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) { ; KNL-LABEL: negative_test1_smax_usat_trunc_wb_256_mem: ; KNL: ## %bb.0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; KNL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; KNL-NEXT: vpbroadcastd {{.*#+}} zmm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; KNL-NEXT: vpmovdb %zmm0, (%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; ; SKX-LABEL: negative_test1_smax_usat_trunc_wb_256_mem: ; SKX: ## %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 -; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 +; SKX-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; SKX-NEXT: vpmovwb %ymm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/known-bits-vector.ll b/llvm/test/CodeGen/X86/known-bits-vector.ll --- a/llvm/test/CodeGen/X86/known-bits-vector.ll +++ b/llvm/test/CodeGen/X86/known-bits-vector.ll @@ -435,11 +435,7 @@ ; X32-NEXT: vpminsd {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpmaxsd {{\.LCPI.*}}, %xmm0, %xmm0 ; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] -; X32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vpsrld $16, %xmm0, %xmm0 -; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X32-NEXT: vsubps {{\.LCPI.*}}, %xmm0, %xmm0 -; X32-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X32-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: knownbits_smax_smin_shuffle_uitofp: @@ -447,11 +443,7 @@ ; X64-NEXT: vpminsd {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpmaxsd {{.*}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,3,3] -; X64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vpsrld $16, %xmm0, %xmm0 -; X64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] -; X64-NEXT: vsubps {{.*}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 ; X64-NEXT: retq %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> ) %2 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> ) diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll --- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll @@ -5190,12 +5190,13 @@ ; SSE2-NEXT: pxor %xmm7, %xmm7 ; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [32768,32768,32768,32768,32768,32768,32768,32768] ; SSE2-NEXT: pxor %xmm6, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [33023,33023,33023,33023,33023,33023,33023,33023] -; SSE2-NEXT: pminsw %xmm8, %xmm1 -; SSE2-NEXT: pxor %xmm6, %xmm1 -; SSE2-NEXT: pxor %xmm6, %xmm0 -; SSE2-NEXT: pminsw %xmm8, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [33023,33023,33023,33023,33023,33023,33023,33023] +; SSE2-NEXT: pminsw %xmm9, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSE2-NEXT: pand %xmm8, %xmm1 ; SSE2-NEXT: pxor %xmm6, %xmm0 +; SSE2-NEXT: pminsw %xmm9, %xmm0 +; SSE2-NEXT: pand %xmm8, %xmm0 ; SSE2-NEXT: packuswb %xmm1, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm7, %xmm4 ; SSE2-NEXT: pmovmskb %xmm4, %ecx @@ -5273,15 +5274,15 @@ ; SSE2-NEXT: # %bb.25: # %cond.store23 ; SSE2-NEXT: movb %cl, 12(%rdi) ; SSE2-NEXT: .LBB15_26: # %else24 -; SSE2-NEXT: pminsw %xmm8, %xmm3 -; SSE2-NEXT: pminsw %xmm8, %xmm2 +; SSE2-NEXT: pminsw %xmm9, %xmm3 +; SSE2-NEXT: pminsw %xmm9, %xmm2 ; SSE2-NEXT: testl $8192, %eax # imm = 0x2000 ; SSE2-NEXT: je .LBB15_28 ; SSE2-NEXT: # %bb.27: # %cond.store25 ; SSE2-NEXT: movb %ch, 13(%rdi) ; SSE2-NEXT: .LBB15_28: # %else26 -; SSE2-NEXT: pxor %xmm6, %xmm3 -; SSE2-NEXT: pxor %xmm6, %xmm2 +; SSE2-NEXT: pand %xmm8, %xmm3 +; SSE2-NEXT: pand %xmm8, %xmm2 ; SSE2-NEXT: testl $16384, %eax # imm = 0x4000 ; SSE2-NEXT: pextrw $7, %xmm0, %ecx ; SSE2-NEXT: je .LBB15_30 @@ -6412,10 +6413,11 @@ ; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [33023,33023,33023,33023,33023,33023,33023,33023] ; SSE2-NEXT: pminsw %xmm5, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSE2-NEXT: pand %xmm6, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm0 ; SSE2-NEXT: pminsw %xmm5, %xmm0 -; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: pand %xmm6, %xmm0 ; SSE2-NEXT: packuswb %xmm1, %xmm0 ; SSE2-NEXT: pcmpeqb %xmm2, %xmm3 ; SSE2-NEXT: pmovmskb %xmm3, %eax @@ -7049,10 +7051,9 @@ ; SSE2-LABEL: truncstore_v8i16_v8i8: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE2-NEXT: pminsw {{.*}}(%rip), %xmm0 -; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm0 ; SSE2-NEXT: pcmpeqw %xmm1, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -4261,19 +4261,17 @@ define <8 x i8> @trunc_usat_v8i16_v8i8(<8 x i16> %a0) { ; SSE2-LABEL: trunc_usat_v8i16_v8i8: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE2-NEXT: pminsw {{.*}}(%rip), %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: trunc_usat_v8i16_v8i8: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: pminsw {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: packuswb %xmm0, %xmm0 ; SSSE3-NEXT: retq ; @@ -4327,20 +4325,18 @@ define void @trunc_usat_v8i16_v8i8_store(<8 x i16> %a0, <8 x i8> *%p1) { ; SSE2-LABEL: trunc_usat_v8i16_v8i8_store: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE2-NEXT: pminsw {{.*}}(%rip), %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 ; SSE2-NEXT: packuswb %xmm0, %xmm0 ; SSE2-NEXT: movq %xmm0, (%rdi) ; SSE2-NEXT: retq ; ; SSSE3-LABEL: trunc_usat_v8i16_v8i8_store: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: pminsw {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 ; SSSE3-NEXT: packuswb %xmm0, %xmm0 ; SSSE3-NEXT: movq %xmm0, (%rdi) ; SSSE3-NEXT: retq @@ -4404,10 +4400,11 @@ ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [33023,33023,33023,33023,33023,33023,33023,33023] ; SSE2-NEXT: pminsw %xmm3, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSE2-NEXT: pand %xmm4, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pminsw %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm4, %xmm0 ; SSE2-NEXT: packuswb %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -4417,10 +4414,11 @@ ; SSSE3-NEXT: pxor %xmm2, %xmm1 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [33023,33023,33023,33023,33023,33023,33023,33023] ; SSSE3-NEXT: pminsw %xmm3, %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSSE3-NEXT: pand %xmm4, %xmm1 ; SSSE3-NEXT: pxor %xmm2, %xmm0 ; SSSE3-NEXT: pminsw %xmm3, %xmm0 -; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: pand %xmm4, %xmm0 ; SSSE3-NEXT: packuswb %xmm1, %xmm0 ; SSSE3-NEXT: retq ; @@ -4494,50 +4492,50 @@ define <32 x i8> @trunc_usat_v32i16_v32i8(<32 x i16>* %p0) { ; SSE2-LABEL: trunc_usat_v32i16_v32i8: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE2-NEXT: movdqa 48(%rdi), %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSE2-NEXT: movdqa 48(%rdi), %xmm2 +; SSE2-NEXT: pxor %xmm0, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [33023,33023,33023,33023,33023,33023,33023,33023] -; SSE2-NEXT: pminsw %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pminsw %xmm3, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSE2-NEXT: pand %xmm4, %xmm2 ; SSE2-NEXT: movdqa 32(%rdi), %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: pminsw %xmm3, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: packuswb %xmm0, %xmm1 -; SSE2-NEXT: movdqa 16(%rdi), %xmm4 -; SSE2-NEXT: pxor %xmm2, %xmm4 -; SSE2-NEXT: pminsw %xmm3, %xmm4 -; SSE2-NEXT: pxor %xmm2, %xmm4 -; SSE2-NEXT: movdqa (%rdi), %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm4, %xmm1 +; SSE2-NEXT: packuswb %xmm2, %xmm1 +; SSE2-NEXT: movdqa 16(%rdi), %xmm2 +; SSE2-NEXT: pxor %xmm0, %xmm2 +; SSE2-NEXT: pminsw %xmm3, %xmm2 +; SSE2-NEXT: pand %xmm4, %xmm2 +; SSE2-NEXT: pxor (%rdi), %xmm0 ; SSE2-NEXT: pminsw %xmm3, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: packuswb %xmm4, %xmm0 +; SSE2-NEXT: pand %xmm4, %xmm0 +; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: trunc_usat_v32i16_v32i8: ; SSSE3: # %bb.0: -; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSSE3-NEXT: movdqa 48(%rdi), %xmm0 -; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768] +; SSSE3-NEXT: movdqa 48(%rdi), %xmm2 +; SSSE3-NEXT: pxor %xmm0, %xmm2 ; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [33023,33023,33023,33023,33023,33023,33023,33023] -; SSSE3-NEXT: pminsw %xmm3, %xmm0 -; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: pminsw %xmm3, %xmm2 +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [32767,32767,32767,32767,32767,32767,32767,32767] +; SSSE3-NEXT: pand %xmm4, %xmm2 ; SSSE3-NEXT: movdqa 32(%rdi), %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm1 +; SSSE3-NEXT: pxor %xmm0, %xmm1 ; SSSE3-NEXT: pminsw %xmm3, %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm1 -; SSSE3-NEXT: packuswb %xmm0, %xmm1 -; SSSE3-NEXT: movdqa 16(%rdi), %xmm4 -; SSSE3-NEXT: pxor %xmm2, %xmm4 -; SSSE3-NEXT: pminsw %xmm3, %xmm4 -; SSSE3-NEXT: pxor %xmm2, %xmm4 -; SSSE3-NEXT: movdqa (%rdi), %xmm0 -; SSSE3-NEXT: pxor %xmm2, %xmm0 +; SSSE3-NEXT: pand %xmm4, %xmm1 +; SSSE3-NEXT: packuswb %xmm2, %xmm1 +; SSSE3-NEXT: movdqa 16(%rdi), %xmm2 +; SSSE3-NEXT: pxor %xmm0, %xmm2 +; SSSE3-NEXT: pminsw %xmm3, %xmm2 +; SSSE3-NEXT: pand %xmm4, %xmm2 +; SSSE3-NEXT: pxor (%rdi), %xmm0 ; SSSE3-NEXT: pminsw %xmm3, %xmm0 -; SSSE3-NEXT: pxor %xmm2, %xmm0 -; SSSE3-NEXT: packuswb %xmm4, %xmm0 +; SSSE3-NEXT: pand %xmm4, %xmm0 +; SSSE3-NEXT: packuswb %xmm2, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: trunc_usat_v32i16_v32i8: