Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1974,9 +1974,25 @@ break; } - // We can always fold X == X for integer setcc's. - if (N1 == N2 && OpVT.isInteger()) - return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); + if (OpVT.isInteger()) { + // For EQ and NE, we can always pick a value for the undef to make the + // predicate pass or fail, so we can return undef. + // Matches behavior in llvm::ConstantFoldCompareInstruction. + // icmp eq/ne X, undef -> undef. + if ((N1.isUndef() || N2.isUndef()) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) + return getUNDEF(VT); + + // If both operands are undef, we can return undef for int comparison. + // icmp undef, undef -> undef. + if (N1.isUndef() && N2.isUndef()) + return getUNDEF(VT); + + // icmp X, X -> true/false + // icmp X, undef -> true/false because undef could be X. + if (N1 == N2) + return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); + } if (ConstantSDNode *N2C = dyn_cast(N2)) { const APInt &C2 = N2C->getAPIntValue(); Index: test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll =================================================================== --- test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -172,9 +172,7 @@ define <4 x i32> @test_urem_div_undef(<4 x i32> %X) nounwind readnone { ; CHECK-LABEL: test_urem_div_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -207,9 +205,7 @@ define <4 x i32> @test_urem_both_undef(<4 x i32> %X) nounwind readnone { ; CHECK-LABEL: test_urem_both_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, Index: test/CodeGen/SPARC/missinglabel.ll =================================================================== --- test/CodeGen/SPARC/missinglabel.ll +++ test/CodeGen/SPARC/missinglabel.ll @@ -4,7 +4,7 @@ target datalayout = "E-m:e-i64:64-n32:64-S128" target triple = "sparc64-unknown-linux-gnu" -define void @f() align 2 { +define void @f(i64 %a0) align 2 { ; CHECK-LABEL: f: ; CHECK: .cfi_startproc ; CHECK-NEXT: ! %bb.0: ! %entry @@ -22,7 +22,7 @@ ; CHECK-NEXT: .LBB0_1: ! %cond.false ; CHECK-NEXT: .LBB0_4: ! %exit.i85 entry: - %cmp = icmp eq i64 undef, 0 + %cmp = icmp eq i64 %a0, 0 br i1 %cmp, label %targetblock, label %cond.false cond.false: Index: test/CodeGen/X86/2006-11-17-IllegalMove.ll =================================================================== --- test/CodeGen/X86/2006-11-17-IllegalMove.ll +++ test/CodeGen/X86/2006-11-17-IllegalMove.ll @@ -10,11 +10,9 @@ ; CHECK-NEXT: ja .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb77 ; CHECK-NEXT: movb 0, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: # kill: def $eax killed $eax def $ax -; CHECK-NEXT: divb 0 -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: cmpq %rax, %rax +; CHECK-NEXT: movb 0, %al +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: .LBB0_2: # %bb84 ; CHECK-NEXT: retq entry: Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -1844,70 +1844,70 @@ ; KNL-NEXT: andq $-128, %rsp ; KNL-NEXT: subq $256, %rsp ## imm = 0x100 ; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm0, %xmm0 ; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm1, %xmm1 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; KNL-NEXT: vpinsrb $1, 232(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $2, 240(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $3, 248(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $4, 256(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $5, 264(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $6, 272(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $7, 280(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $8, 288(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $9, 296(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $10, 304(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $11, 312(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $12, 320(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $13, 328(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $14, 336(%rbp), %xmm1, %xmm1 -; KNL-NEXT: vpinsrb $15, 344(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $1, 488(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, 496(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, 504(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, 512(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, 520(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, 528(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, 536(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, 544(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, 552(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, 560(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, 568(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, 576(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, 584(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, 592(%rbp), %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $15, 600(%rbp), %xmm1, %xmm1 ; KNL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; KNL-NEXT: vpinsrb $1, 360(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $2, 368(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $3, 376(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $4, 384(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $5, 392(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $6, 400(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $7, 408(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $8, 416(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $9, 424(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $10, 432(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $11, 440(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $12, 448(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $13, 456(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $14, 464(%rbp), %xmm2, %xmm2 -; KNL-NEXT: vpinsrb $15, 472(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $1, 616(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $2, 624(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, 632(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, 640(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, 648(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, 656(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, 664(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, 672(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, 680(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, 688(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, 696(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, 704(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, 712(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, 720(%rbp), %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $15, 728(%rbp), %xmm2, %xmm2 ; KNL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; KNL-NEXT: vmovd %edi, %xmm2 ; KNL-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2 @@ -1952,7 +1952,6 @@ ; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 ; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; KNL-NEXT: cmpb $0, 736(%rbp) -; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovdqa %ymm2, (%rsp) Index: test/CodeGen/X86/select.ll =================================================================== --- test/CodeGen/X86/select.ll +++ test/CodeGen/X86/select.ll @@ -1252,106 +1252,6 @@ ret void } -define void @test19() { -; This is a massive reduction of an llvm-stress test case that generates -; interesting chains feeding setcc and eventually a f32 select operation. This -; is intended to exercise the SELECT formation in the DAG combine simplifying -; a simplified select_cc node. If it it regresses and is no longer triggering -; that code path, it can be deleted. -; -; CHECK-LABEL: test19: -; CHECK: ## %bb.0: ## %BB -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: movb $1, %cl -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB23_1: ## %CF -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne LBB23_1 -; CHECK-NEXT: ## %bb.2: ## %CF250 -; CHECK-NEXT: ## in Loop: Header=BB23_1 Depth=1 -; CHECK-NEXT: jne LBB23_1 -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB23_3: ## %CF242 -; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmpl %eax, %eax -; CHECK-NEXT: ucomiss %xmm0, %xmm0 -; CHECK-NEXT: jp LBB23_3 -; CHECK-NEXT: ## %bb.4: ## %CF244 -; CHECK-NEXT: retq -; -; ATHLON-LABEL: test19: -; ATHLON: ## %bb.0: ## %BB -; ATHLON-NEXT: movb $1, %al -; ATHLON-NEXT: .p2align 4, 0x90 -; ATHLON-NEXT: LBB23_1: ## %CF -; ATHLON-NEXT: ## =>This Inner Loop Header: Depth=1 -; ATHLON-NEXT: testb %al, %al -; ATHLON-NEXT: jne LBB23_1 -; ATHLON-NEXT: ## %bb.2: ## %CF250 -; ATHLON-NEXT: ## in Loop: Header=BB23_1 Depth=1 -; ATHLON-NEXT: jne LBB23_1 -; ATHLON-NEXT: ## %bb.3: ## %CF242.preheader -; ATHLON-NEXT: fldz -; ATHLON-NEXT: .p2align 4, 0x90 -; ATHLON-NEXT: LBB23_4: ## %CF242 -; ATHLON-NEXT: ## =>This Inner Loop Header: Depth=1 -; ATHLON-NEXT: fucomi %st(0), %st -; ATHLON-NEXT: jp LBB23_4 -; ATHLON-NEXT: ## %bb.5: ## %CF244 -; ATHLON-NEXT: fstp %st(0) -; ATHLON-NEXT: retl -; -; MCU-LABEL: test19: -; MCU: # %bb.0: # %BB -; MCU-NEXT: movl $-1, %ecx -; MCU-NEXT: movb $1, %al -; MCU-NEXT: .p2align 4, 0x90 -; MCU-NEXT: .LBB23_1: # %CF -; MCU-NEXT: # =>This Inner Loop Header: Depth=1 -; MCU-NEXT: testb %al, %al -; MCU-NEXT: jne .LBB23_1 -; MCU-NEXT: # %bb.2: # %CF250 -; MCU-NEXT: # in Loop: Header=BB23_1 Depth=1 -; MCU-NEXT: jne .LBB23_1 -; MCU-NEXT: # %bb.3: # %CF242.preheader -; MCU-NEXT: fldz -; MCU-NEXT: .p2align 4, 0x90 -; MCU-NEXT: .LBB23_4: # %CF242 -; MCU-NEXT: # =>This Inner Loop Header: Depth=1 -; MCU-NEXT: cmpl %eax, %ecx -; MCU-NEXT: fucom %st(0) -; MCU-NEXT: fnstsw %ax -; MCU-NEXT: # kill: def $ah killed $ah killed $ax -; MCU-NEXT: sahf -; MCU-NEXT: jp .LBB23_4 -; MCU-NEXT: # %bb.5: # %CF244 -; MCU-NEXT: fstp %st(0) -; MCU-NEXT: retl -BB: - br label %CF - -CF: - %Cmp10 = icmp ule i8 undef, undef - br i1 %Cmp10, label %CF, label %CF250 - -CF250: - %E12 = extractelement <4 x i32> , i32 2 - %Cmp32 = icmp ugt i1 %Cmp10, false - br i1 %Cmp32, label %CF, label %CF242 - -CF242: - %Cmp38 = icmp uge i32 %E12, undef - %FC = uitofp i1 %Cmp38 to float - %Sl59 = select i1 %Cmp32, float %FC, float undef - %Cmp60 = fcmp ugt float undef, undef - br i1 %Cmp60, label %CF242, label %CF244 - -CF244: - %B122 = fadd float %Sl59, undef - ret void -} - define i16 @select_xor_1(i16 %A, i8 %cond) { ; CHECK-LABEL: select_xor_1: ; CHECK: ## %bb.0: ## %entry @@ -1413,10 +1313,10 @@ ; MCU-LABEL: select_xor_1b: ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %dl -; MCU-NEXT: je .LBB25_2 +; MCU-NEXT: je .LBB24_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: xorl $43, %eax -; MCU-NEXT: .LBB25_2: # %entry +; MCU-NEXT: .LBB24_2: # %entry ; MCU-NEXT: # kill: def $ax killed $ax killed $eax ; MCU-NEXT: retl entry: @@ -1483,10 +1383,10 @@ ; MCU-LABEL: select_xor_2b: ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %cl -; MCU-NEXT: je .LBB27_2 +; MCU-NEXT: je .LBB26_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: xorl %edx, %eax -; MCU-NEXT: .LBB27_2: # %entry +; MCU-NEXT: .LBB26_2: # %entry ; MCU-NEXT: retl entry: %and = and i8 %cond, 1 @@ -1552,10 +1452,10 @@ ; MCU-LABEL: select_or_b: ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %cl -; MCU-NEXT: je .LBB29_2 +; MCU-NEXT: je .LBB28_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: .LBB29_2: # %entry +; MCU-NEXT: .LBB28_2: # %entry ; MCU-NEXT: retl entry: %and = and i8 %cond, 1 @@ -1621,10 +1521,10 @@ ; MCU-LABEL: select_or_1b: ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %cl -; MCU-NEXT: je .LBB31_2 +; MCU-NEXT: je .LBB30_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: .LBB31_2: # %entry +; MCU-NEXT: .LBB30_2: # %entry ; MCU-NEXT: retl entry: %and = and i32 %cond, 1 Index: test/CodeGen/X86/undef-ops.ll =================================================================== --- test/CodeGen/X86/undef-ops.ll +++ test/CodeGen/X86/undef-ops.ll @@ -450,8 +450,6 @@ define i1 @undef_operand_size_not_same_as_result() { ; CHECK-LABEL: undef_operand_size_not_same_as_result: ; CHECK: # %bb.0: -; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: sete %al ; CHECK-NEXT: retq %sh = shl i32 7, undef %cmp = icmp eq i32 0, %sh Index: test/CodeGen/X86/urem-seteq-vec-nonsplat.ll =================================================================== --- test/CodeGen/X86/urem-seteq-vec-nonsplat.ll +++ test/CodeGen/X86/urem-seteq-vec-nonsplat.ll @@ -683,16 +683,12 @@ define <4 x i32> @test_urem_div_undef(<4 x i32> %X) nounwind readnone { ; CHECK-SSE-LABEL: test_urem_div_undef: ; CHECK-SSE: # %bb.0: -; CHECK-SSE-NEXT: pxor %xmm0, %xmm0 -; CHECK-SSE-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-SSE-NEXT: psrld $31, %xmm0 +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: test_urem_div_undef: ; CHECK-AVX: # %bb.0: -; CHECK-AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 +; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-AVX-NEXT: retq %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -794,16 +790,12 @@ define <4 x i32> @test_urem_both_undef(<4 x i32> %X) nounwind readnone { ; CHECK-SSE-LABEL: test_urem_both_undef: ; CHECK-SSE: # %bb.0: -; CHECK-SSE-NEXT: pxor %xmm0, %xmm0 -; CHECK-SSE-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-SSE-NEXT: psrld $31, %xmm0 +; CHECK-SSE-NEXT: xorps %xmm0, %xmm0 ; CHECK-SSE-NEXT: retq ; ; CHECK-AVX-LABEL: test_urem_both_undef: ; CHECK-AVX: # %bb.0: -; CHECK-AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; CHECK-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 +; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; CHECK-AVX-NEXT: retq %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, Index: test/CodeGen/X86/vector-shift-ashr-sub128-widen.ll =================================================================== --- test/CodeGen/X86/vector-shift-ashr-sub128-widen.ll +++ test/CodeGen/X86/vector-shift-ashr-sub128-widen.ll @@ -1806,44 +1806,20 @@ ; ; SSE41-LABEL: constant_shift_v4i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,1,2,3,u,u,u,u> -; SSE41-NEXT: pcmpeqw %xmm2, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm3 = -; SSE41-NEXT: pmulhw %xmm1, %xmm3 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm3 -; SSE41-NEXT: pcmpeqw {{.*}}(%rip), %xmm2 -; SSE41-NEXT: psraw $1, %xmm1 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm3 -; SSE41-NEXT: movdqa %xmm3, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = +; SSE41-NEXT: pmulhw %xmm0, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] +; SSE41-NEXT: psraw $1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3,4,5,6,7] ; SSE41-NEXT: retq ; -; AVX1-LABEL: constant_shift_v4i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [4.1720559249406128E-309,4.1720559249406128E-309] -; AVX1-NEXT: # xmm2 = mem[0,0] -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1 -; AVX1-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm3 -; AVX1-NEXT: vpblendvb %xmm1, %xmm0, %xmm3, %xmm1 -; AVX1-NEXT: vpcmpeqw {{.*}}(%rip), %xmm2, %xmm2 -; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0 -; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: constant_shift_v4i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [844433520132096,844433520132096] -; AVX2-NEXT: vpcmpeqw %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm3 -; AVX2-NEXT: vpblendvb %xmm1, %xmm0, %xmm3, %xmm1 -; AVX2-NEXT: vpcmpeqw {{.*}}(%rip), %xmm2, %xmm2 -; AVX2-NEXT: vpsraw $1, %xmm0, %xmm0 -; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: constant_shift_v4i16: +; AVX: # %bb.0: +; AVX-NEXT: vpmulhw {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] +; AVX-NEXT: vpsraw $1, %xmm0, %xmm0 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3,4,5,6,7] +; AVX-NEXT: retq ; ; XOP-LABEL: constant_shift_v4i16: ; XOP: # %bb.0: Index: test/CodeGen/X86/vector-shift-lshr-sub128-widen.ll =================================================================== --- test/CodeGen/X86/vector-shift-lshr-sub128-widen.ll +++ test/CodeGen/X86/vector-shift-lshr-sub128-widen.ll @@ -1500,32 +1500,25 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; SSE2-LABEL: constant_shift_v4i16: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = -; SSE2-NEXT: pmulhuw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pcmpeqw {{.*}}(%rip), %xmm2 -; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: pandn %xmm1, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm0, %xmm2 +; SSE2-NEXT: pmulhuw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: constant_shift_v4i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm0 -; SSE41-NEXT: pcmpeqw {{.*}}(%rip), %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = -; SSE41-NEXT: pmulhuw %xmm1, %xmm2 -; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 +; SSE41-NEXT: movdqa {{.*#+}} xmm1 = +; SSE41-NEXT: pmulhuw %xmm0, %xmm1 +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; SSE41-NEXT: retq ; ; AVX-LABEL: constant_shift_v4i16: ; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpcmpeqw {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm2 -; AVX-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 +; AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX-NEXT: retq ; ; XOP-LABEL: constant_shift_v4i16: @@ -1535,10 +1528,8 @@ ; ; AVX512DQ-LABEL: constant_shift_v4i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpeqw {{.*}}(%rip), %xmm1, %xmm1 -; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm2 -; AVX512DQ-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 +; AVX512DQ-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 +; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: constant_shift_v4i16: @@ -1552,10 +1543,8 @@ ; ; AVX512DQVL-LABEL: constant_shift_v4i16: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQVL-NEXT: vpcmpeqw {{.*}}(%rip), %xmm1, %xmm1 -; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm2 -; AVX512DQVL-NEXT: vpblendvb %xmm1, %xmm0, %xmm2, %xmm0 +; AVX512DQVL-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1 +; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: constant_shift_v4i16: @@ -1565,12 +1554,11 @@ ; ; X32-SSE-LABEL: constant_shift_v4i16: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = -; X32-SSE-NEXT: pmulhuw %xmm0, %xmm1 -; X32-SSE-NEXT: pxor %xmm2, %xmm2 -; X32-SSE-NEXT: pcmpeqw {{\.LCPI.*}}, %xmm2 -; X32-SSE-NEXT: pand %xmm2, %xmm0 -; X32-SSE-NEXT: pandn %xmm1, %xmm2 +; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535] +; X32-SSE-NEXT: movdqa %xmm1, %xmm2 +; X32-SSE-NEXT: pandn %xmm0, %xmm2 +; X32-SSE-NEXT: pmulhuw {{\.LCPI.*}}, %xmm0 +; X32-SSE-NEXT: pand %xmm1, %xmm0 ; X32-SSE-NEXT: por %xmm2, %xmm0 ; X32-SSE-NEXT: retl %shift = lshr <4 x i16> %a,