Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3138,11 +3138,12 @@ // X / 1 -> X // X % 1 -> 0 - if (N1C && N1C->isOne()) - return IsDiv ? N0 : DAG.getConstant(0, DL, VT); // If this is a boolean op (single-bit element type), we can't have // division-by-zero or remainder-by-zero, so assume the divisor is 1. - // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1. + // TODO: Similarly, if we're zero-extending a boolean divisor, then assume + // it's a 1. + if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1)) + return IsDiv ? N0 : DAG.getConstant(0, DL, VT); return SDValue(); } Index: llvm/trunk/test/CodeGen/Mips/llvm-ir/sdiv.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/llvm-ir/sdiv.ll +++ llvm/trunk/test/CodeGen/Mips/llvm-ir/sdiv.ll @@ -35,55 +35,32 @@ define signext i1 @sdiv_i1(i1 signext %a, i1 signext %b) { ; GP32-LABEL: sdiv_i1: ; GP32: # %bb.0: # %entry -; GP32-NEXT: div $zero, $4, $5 -; GP32-NEXT: teq $5, $zero, 7 -; GP32-NEXT: mflo $1 -; GP32-NEXT: andi $1, $1, 1 ; GP32-NEXT: jr $ra -; GP32-NEXT: negu $2, $1 +; GP32-NEXT: move $2, $4 ; ; GP32R6-LABEL: sdiv_i1: ; GP32R6: # %bb.0: # %entry -; GP32R6-NEXT: div $1, $4, $5 -; GP32R6-NEXT: teq $5, $zero, 7 -; GP32R6-NEXT: andi $1, $1, 1 ; GP32R6-NEXT: jr $ra -; GP32R6-NEXT: negu $2, $1 +; GP32R6-NEXT: move $2, $4 ; ; GP64-LABEL: sdiv_i1: ; GP64: # %bb.0: # %entry -; GP64-NEXT: div $zero, $4, $5 -; GP64-NEXT: teq $5, $zero, 7 -; GP64-NEXT: mflo $1 -; GP64-NEXT: andi $1, $1, 1 ; GP64-NEXT: jr $ra -; GP64-NEXT: negu $2, $1 +; GP64-NEXT: move $2, $4 ; ; GP64R6-LABEL: sdiv_i1: ; GP64R6: # %bb.0: # %entry -; GP64R6-NEXT: div $1, $4, $5 -; GP64R6-NEXT: teq $5, $zero, 7 -; GP64R6-NEXT: andi $1, $1, 1 ; GP64R6-NEXT: jr $ra -; GP64R6-NEXT: negu $2, $1 +; GP64R6-NEXT: move $2, $4 ; ; MMR3-LABEL: sdiv_i1: ; MMR3: # %bb.0: # %entry -; MMR3-NEXT: div $zero, $4, $5 -; MMR3-NEXT: teq $5, $zero, 7 -; MMR3-NEXT: mflo16 $2 -; MMR3-NEXT: andi16 $2, $2, 1 -; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: subu16 $2, $3, $2 +; MMR3-NEXT: move $2, $4 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: sdiv_i1: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: div $2, $4, $5 -; MMR6-NEXT: teq $5, $zero, 7 -; MMR6-NEXT: andi16 $2, $2, 1 -; MMR6-NEXT: li16 $3, 0 -; MMR6-NEXT: subu16 $2, $3, $2 +; MMR6-NEXT: move $2, $4 ; MMR6-NEXT: jrc $ra entry: %r = sdiv i1 %a, %b Index: llvm/trunk/test/CodeGen/Mips/llvm-ir/srem.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/llvm-ir/srem.ll +++ llvm/trunk/test/CodeGen/Mips/llvm-ir/srem.ll @@ -35,55 +35,32 @@ define signext i1 @srem_i1(i1 signext %a, i1 signext %b) { ; GP32-LABEL: srem_i1: ; GP32: # %bb.0: # %entry -; GP32-NEXT: div $zero, $4, $5 -; GP32-NEXT: teq $5, $zero, 7 -; GP32-NEXT: mfhi $1 -; GP32-NEXT: andi $1, $1, 1 ; GP32-NEXT: jr $ra -; GP32-NEXT: negu $2, $1 +; GP32-NEXT: addiu $2, $zero, 0 ; ; GP32R6-LABEL: srem_i1: ; GP32R6: # %bb.0: # %entry -; GP32R6-NEXT: mod $1, $4, $5 -; GP32R6-NEXT: teq $5, $zero, 7 -; GP32R6-NEXT: andi $1, $1, 1 ; GP32R6-NEXT: jr $ra -; GP32R6-NEXT: negu $2, $1 +; GP32R6-NEXT: addiu $2, $zero, 0 ; ; GP64-LABEL: srem_i1: ; GP64: # %bb.0: # %entry -; GP64-NEXT: div $zero, $4, $5 -; GP64-NEXT: teq $5, $zero, 7 -; GP64-NEXT: mfhi $1 -; GP64-NEXT: andi $1, $1, 1 ; GP64-NEXT: jr $ra -; GP64-NEXT: negu $2, $1 +; GP64-NEXT: addiu $2, $zero, 0 ; ; GP64R6-LABEL: srem_i1: ; GP64R6: # %bb.0: # %entry -; GP64R6-NEXT: mod $1, $4, $5 -; GP64R6-NEXT: teq $5, $zero, 7 -; GP64R6-NEXT: andi $1, $1, 1 ; GP64R6-NEXT: jr $ra -; GP64R6-NEXT: negu $2, $1 +; GP64R6-NEXT: addiu $2, $zero, 0 ; ; MMR3-LABEL: srem_i1: ; MMR3: # %bb.0: # %entry -; MMR3-NEXT: div $zero, $4, $5 -; MMR3-NEXT: teq $5, $zero, 7 -; MMR3-NEXT: mfhi16 $2 -; MMR3-NEXT: andi16 $2, $2, 1 -; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: subu16 $2, $3, $2 +; MMR3-NEXT: li16 $2, 0 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: srem_i1: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: mod $2, $4, $5 -; MMR6-NEXT: teq $5, $zero, 7 -; MMR6-NEXT: andi16 $2, $2, 1 -; MMR6-NEXT: li16 $3, 0 -; MMR6-NEXT: subu16 $2, $3, $2 +; MMR6-NEXT: li16 $2, 0 ; MMR6-NEXT: jrc $ra entry: %r = srem i1 %a, %b Index: llvm/trunk/test/CodeGen/Mips/llvm-ir/udiv.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/llvm-ir/udiv.ll +++ llvm/trunk/test/CodeGen/Mips/llvm-ir/udiv.ll @@ -35,41 +35,32 @@ define zeroext i1 @udiv_i1(i1 zeroext %a, i1 zeroext %b) { ; GP32-LABEL: udiv_i1: ; GP32: # %bb.0: # %entry -; GP32-NEXT: divu $zero, $4, $5 -; GP32-NEXT: teq $5, $zero, 7 ; GP32-NEXT: jr $ra -; GP32-NEXT: mflo $2 +; GP32-NEXT: move $2, $4 ; ; GP32R6-LABEL: udiv_i1: ; GP32R6: # %bb.0: # %entry -; GP32R6-NEXT: divu $2, $4, $5 -; GP32R6-NEXT: teq $5, $zero, 7 -; GP32R6-NEXT: jrc $ra +; GP32R6-NEXT: jr $ra +; GP32R6-NEXT: move $2, $4 ; ; GP64-LABEL: udiv_i1: ; GP64: # %bb.0: # %entry -; GP64-NEXT: divu $zero, $4, $5 -; GP64-NEXT: teq $5, $zero, 7 ; GP64-NEXT: jr $ra -; GP64-NEXT: mflo $2 +; GP64-NEXT: move $2, $4 ; ; GP64R6-LABEL: udiv_i1: ; GP64R6: # %bb.0: # %entry -; GP64R6-NEXT: divu $2, $4, $5 -; GP64R6-NEXT: teq $5, $zero, 7 -; GP64R6-NEXT: jrc $ra +; GP64R6-NEXT: jr $ra +; GP64R6-NEXT: move $2, $4 ; ; MMR3-LABEL: udiv_i1: ; MMR3: # %bb.0: # %entry -; MMR3-NEXT: divu $zero, $4, $5 -; MMR3-NEXT: teq $5, $zero, 7 -; MMR3-NEXT: mflo16 $2 +; MMR3-NEXT: move $2, $4 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: udiv_i1: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: divu $2, $4, $5 -; MMR6-NEXT: teq $5, $zero, 7 +; MMR6-NEXT: move $2, $4 ; MMR6-NEXT: jrc $ra entry: %r = udiv i1 %a, %b Index: llvm/trunk/test/CodeGen/Mips/llvm-ir/urem.ll =================================================================== --- llvm/trunk/test/CodeGen/Mips/llvm-ir/urem.ll +++ llvm/trunk/test/CodeGen/Mips/llvm-ir/urem.ll @@ -35,64 +35,32 @@ define signext i1 @urem_i1(i1 signext %a, i1 signext %b) { ; GP32-LABEL: urem_i1: ; GP32: # %bb.0: # %entry -; GP32-NEXT: andi $1, $5, 1 -; GP32-NEXT: andi $2, $4, 1 -; GP32-NEXT: divu $zero, $2, $1 -; GP32-NEXT: teq $1, $zero, 7 -; GP32-NEXT: mfhi $1 -; GP32-NEXT: andi $1, $1, 1 ; GP32-NEXT: jr $ra -; GP32-NEXT: negu $2, $1 +; GP32-NEXT: addiu $2, $zero, 0 ; ; GP32R6-LABEL: urem_i1: ; GP32R6: # %bb.0: # %entry -; GP32R6-NEXT: andi $1, $5, 1 -; GP32R6-NEXT: andi $2, $4, 1 -; GP32R6-NEXT: modu $2, $2, $1 -; GP32R6-NEXT: teq $1, $zero, 7 ; GP32R6-NEXT: jr $ra -; GP32R6-NEXT: negu $2, $2 +; GP32R6-NEXT: addiu $2, $zero, 0 ; ; GP64-LABEL: urem_i1: ; GP64: # %bb.0: # %entry -; GP64-NEXT: andi $1, $5, 1 -; GP64-NEXT: andi $2, $4, 1 -; GP64-NEXT: divu $zero, $2, $1 -; GP64-NEXT: teq $1, $zero, 7 -; GP64-NEXT: mfhi $1 -; GP64-NEXT: andi $1, $1, 1 ; GP64-NEXT: jr $ra -; GP64-NEXT: negu $2, $1 +; GP64-NEXT: addiu $2, $zero, 0 ; ; GP64R6-LABEL: urem_i1: ; GP64R6: # %bb.0: # %entry -; GP64R6-NEXT: andi $1, $5, 1 -; GP64R6-NEXT: andi $2, $4, 1 -; GP64R6-NEXT: modu $2, $2, $1 -; GP64R6-NEXT: teq $1, $zero, 7 ; GP64R6-NEXT: jr $ra -; GP64R6-NEXT: negu $2, $2 +; GP64R6-NEXT: addiu $2, $zero, 0 ; ; MMR3-LABEL: urem_i1: ; MMR3: # %bb.0: # %entry -; MMR3-NEXT: andi16 $2, $5, 1 -; MMR3-NEXT: andi16 $3, $4, 1 -; MMR3-NEXT: divu $zero, $3, $2 -; MMR3-NEXT: teq $2, $zero, 7 -; MMR3-NEXT: mfhi16 $2 -; MMR3-NEXT: andi16 $2, $2, 1 -; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: subu16 $2, $3, $2 +; MMR3-NEXT: li16 $2, 0 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: urem_i1: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: andi16 $2, $5, 1 -; MMR6-NEXT: andi16 $3, $4, 1 -; MMR6-NEXT: modu $3, $3, $2 -; MMR6-NEXT: teq $2, $zero, 7 ; MMR6-NEXT: li16 $2, 0 -; MMR6-NEXT: subu16 $2, $2, $3 ; MMR6-NEXT: jrc $ra entry: %r = urem i1 %a, %b Index: llvm/trunk/test/CodeGen/X86/combine-sdiv.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-sdiv.ll +++ llvm/trunk/test/CodeGen/X86/combine-sdiv.ll @@ -3294,322 +3294,16 @@ ; CHECK-LABEL: bool_sdiv: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: negb %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax -; CHECK-NEXT: cbtw -; CHECK-NEXT: andb $1, %sil -; CHECK-NEXT: negb %sil -; CHECK-NEXT: idivb %sil ; CHECK-NEXT: retq %r = sdiv i1 %x, %y ret i1 %r } define <4 x i1> @boolvec_sdiv(<4 x i1> %x, <4 x i1> %y) { -; SSE2-LABEL: boolvec_sdiv: -; SSE2: # %bb.0: -; SSE2-NEXT: pslld $31, %xmm1 -; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: pslld $31, %xmm0 -; SSE2-NEXT: psrad $31, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm2, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm3, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] -; SSE2-NEXT: movd %xmm3, %ecx -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: boolvec_sdiv: -; SSE41: # %bb.0: -; SSE41-NEXT: pslld $31, %xmm1 -; SSE41-NEXT: psrad $31, %xmm1 -; SSE41-NEXT: pslld $31, %xmm0 -; SSE41-NEXT: psrad $31, %xmm0 -; SSE41-NEXT: pextrd $1, %xmm0, %eax -; SSE41-NEXT: pextrd $1, %xmm1, %ecx -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: movd %xmm1, %esi -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %esi -; SSE41-NEXT: movd %eax, %xmm2 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm2 -; SSE41-NEXT: pextrd $2, %xmm0, %eax -; SSE41-NEXT: pextrd $2, %xmm1, %ecx -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $2, %eax, %xmm2 -; SSE41-NEXT: pextrd $3, %xmm0, %eax -; SSE41-NEXT: pextrd $3, %xmm1, %ecx -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $3, %eax, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: boolvec_sdiv: -; AVX1: # %bb.0: -; AVX1-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 -; AVX1-NEXT: vpextrd $1, %xmm0, %eax -; AVX1-NEXT: vpextrd $1, %xmm1, %ecx -; AVX1-NEXT: cltd -; AVX1-NEXT: idivl %ecx -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: vmovd %xmm1, %esi -; AVX1-NEXT: cltd -; AVX1-NEXT: idivl %esi -; AVX1-NEXT: vmovd %eax, %xmm2 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $2, %xmm0, %eax -; AVX1-NEXT: vpextrd $2, %xmm1, %ecx -; AVX1-NEXT: cltd -; AVX1-NEXT: idivl %ecx -; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $3, %xmm0, %eax -; AVX1-NEXT: vpextrd $3, %xmm1, %ecx -; AVX1-NEXT: cltd -; AVX1-NEXT: idivl %ecx -; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: boolvec_sdiv: -; AVX2: # %bb.0: -; AVX2-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 -; AVX2-NEXT: vpextrd $1, %xmm0, %eax -; AVX2-NEXT: vpextrd $1, %xmm1, %ecx -; AVX2-NEXT: cltd -; AVX2-NEXT: idivl %ecx -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: vmovd %xmm1, %esi -; AVX2-NEXT: cltd -; AVX2-NEXT: idivl %esi -; AVX2-NEXT: vmovd %eax, %xmm2 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $2, %xmm0, %eax -; AVX2-NEXT: vpextrd $2, %xmm1, %ecx -; AVX2-NEXT: cltd -; AVX2-NEXT: idivl %ecx -; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $3, %xmm0, %eax -; AVX2-NEXT: vpextrd $3, %xmm1, %ecx -; AVX2-NEXT: cltd -; AVX2-NEXT: idivl %ecx -; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: boolvec_sdiv: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3 -; AVX512F-NEXT: kshiftrw $3, %k3, %k0 -; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4 -; AVX512F-NEXT: kshiftrw $3, %k4, %k1 -; AVX512F-NEXT: kshiftrw $2, %k3, %k2 -; AVX512F-NEXT: kshiftrw $2, %k4, %k5 -; AVX512F-NEXT: kmovw %k5, %ecx -; AVX512F-NEXT: kshiftrw $1, %k3, %k5 -; AVX512F-NEXT: kmovw %k3, %edi -; AVX512F-NEXT: kshiftrw $1, %k4, %k3 -; AVX512F-NEXT: kmovw %k4, %esi -; AVX512F-NEXT: kmovw %k5, %edx -; AVX512F-NEXT: kmovw %k3, %eax -; AVX512F-NEXT: andb $1, %al -; AVX512F-NEXT: negb %al -; AVX512F-NEXT: # kill: def $al killed $al killed $eax -; AVX512F-NEXT: cbtw -; AVX512F-NEXT: andb $1, %dl -; AVX512F-NEXT: negb %dl -; AVX512F-NEXT: idivb %dl -; AVX512F-NEXT: movl %eax, %edx -; AVX512F-NEXT: andb $1, %sil -; AVX512F-NEXT: negb %sil -; AVX512F-NEXT: movl %esi, %eax -; AVX512F-NEXT: cbtw -; AVX512F-NEXT: andb $1, %dil -; AVX512F-NEXT: negb %dil -; AVX512F-NEXT: idivb %dil -; AVX512F-NEXT: movl %eax, %esi -; AVX512F-NEXT: andb $1, %cl -; AVX512F-NEXT: negb %cl -; AVX512F-NEXT: movl %ecx, %eax -; AVX512F-NEXT: cbtw -; AVX512F-NEXT: kmovw %k2, %ecx -; AVX512F-NEXT: andb $1, %cl -; AVX512F-NEXT: negb %cl -; AVX512F-NEXT: idivb %cl -; AVX512F-NEXT: movl %eax, %ecx -; AVX512F-NEXT: kmovw %k1, %eax -; AVX512F-NEXT: andb $1, %al -; AVX512F-NEXT: negb %al -; AVX512F-NEXT: # kill: def $al killed $al killed $eax -; AVX512F-NEXT: cbtw -; AVX512F-NEXT: kmovw %k0, %edi -; AVX512F-NEXT: andb $1, %dil -; AVX512F-NEXT: negb %dil -; AVX512F-NEXT: idivb %dil -; AVX512F-NEXT: # kill: def $al killed $al def $eax -; AVX512F-NEXT: kmovw %edx, %k0 -; AVX512F-NEXT: kmovw %esi, %k1 -; AVX512F-NEXT: kshiftrw $1, %k1, %k2 -; AVX512F-NEXT: kxorw %k0, %k2, %k0 -; AVX512F-NEXT: kshiftlw $15, %k0, %k0 -; AVX512F-NEXT: kshiftrw $14, %k0, %k0 -; AVX512F-NEXT: kxorw %k0, %k1, %k0 -; AVX512F-NEXT: kshiftrw $2, %k0, %k1 -; AVX512F-NEXT: kmovw %ecx, %k2 -; AVX512F-NEXT: kxorw %k2, %k1, %k1 -; AVX512F-NEXT: kshiftlw $15, %k1, %k1 -; AVX512F-NEXT: kshiftrw $13, %k1, %k1 -; AVX512F-NEXT: kxorw %k1, %k0, %k0 -; AVX512F-NEXT: kshiftlw $13, %k0, %k0 -; AVX512F-NEXT: kshiftrw $13, %k0, %k0 -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: kshiftlw $3, %k1, %k1 -; AVX512F-NEXT: korw %k1, %k0, %k1 -; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} -; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512BW-LABEL: boolvec_sdiv: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX512BW-NEXT: vptestmd %xmm1, %xmm1, %k3 -; AVX512BW-NEXT: kshiftrw $3, %k3, %k0 -; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k4 -; AVX512BW-NEXT: kshiftrw $3, %k4, %k1 -; AVX512BW-NEXT: kshiftrw $2, %k3, %k2 -; AVX512BW-NEXT: kshiftrw $2, %k4, %k5 -; AVX512BW-NEXT: kmovd %k5, %ecx -; AVX512BW-NEXT: kshiftrw $1, %k3, %k5 -; AVX512BW-NEXT: kmovd %k3, %edi -; AVX512BW-NEXT: kshiftrw $1, %k4, %k3 -; AVX512BW-NEXT: kmovd %k4, %esi -; AVX512BW-NEXT: kmovd %k5, %edx -; AVX512BW-NEXT: kmovd %k3, %eax -; AVX512BW-NEXT: andb $1, %al -; AVX512BW-NEXT: negb %al -; AVX512BW-NEXT: # kill: def $al killed $al killed $eax -; AVX512BW-NEXT: cbtw -; AVX512BW-NEXT: andb $1, %dl -; AVX512BW-NEXT: negb %dl -; AVX512BW-NEXT: idivb %dl -; AVX512BW-NEXT: movl %eax, %edx -; AVX512BW-NEXT: andb $1, %sil -; AVX512BW-NEXT: negb %sil -; AVX512BW-NEXT: movl %esi, %eax -; AVX512BW-NEXT: cbtw -; AVX512BW-NEXT: andb $1, %dil -; AVX512BW-NEXT: negb %dil -; AVX512BW-NEXT: idivb %dil -; AVX512BW-NEXT: movl %eax, %esi -; AVX512BW-NEXT: andb $1, %cl -; AVX512BW-NEXT: negb %cl -; AVX512BW-NEXT: movl %ecx, %eax -; AVX512BW-NEXT: cbtw -; AVX512BW-NEXT: kmovd %k2, %ecx -; AVX512BW-NEXT: andb $1, %cl -; AVX512BW-NEXT: negb %cl -; AVX512BW-NEXT: idivb %cl -; AVX512BW-NEXT: movl %eax, %ecx -; AVX512BW-NEXT: kmovd %k1, %eax -; AVX512BW-NEXT: andb $1, %al -; AVX512BW-NEXT: negb %al -; AVX512BW-NEXT: # kill: def $al killed $al killed $eax -; AVX512BW-NEXT: cbtw -; AVX512BW-NEXT: kmovd %k0, %edi -; AVX512BW-NEXT: andb $1, %dil -; AVX512BW-NEXT: negb %dil -; AVX512BW-NEXT: idivb %dil -; AVX512BW-NEXT: # kill: def $al killed $al def $eax -; AVX512BW-NEXT: kmovd %edx, %k0 -; AVX512BW-NEXT: kmovd %esi, %k1 -; AVX512BW-NEXT: kshiftrw $1, %k1, %k2 -; AVX512BW-NEXT: kxorw %k0, %k2, %k0 -; AVX512BW-NEXT: kshiftlw $15, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $14, %k0, %k0 -; AVX512BW-NEXT: kxorw %k0, %k1, %k0 -; AVX512BW-NEXT: kshiftrw $2, %k0, %k1 -; AVX512BW-NEXT: kmovd %ecx, %k2 -; AVX512BW-NEXT: kxorw %k2, %k1, %k1 -; AVX512BW-NEXT: kshiftlw $15, %k1, %k1 -; AVX512BW-NEXT: kshiftrw $13, %k1, %k1 -; AVX512BW-NEXT: kxorw %k1, %k0, %k0 -; AVX512BW-NEXT: kshiftlw $13, %k0, %k0 -; AVX512BW-NEXT: kshiftrw $13, %k0, %k0 -; AVX512BW-NEXT: kmovd %eax, %k1 -; AVX512BW-NEXT: kshiftlw $3, %k1, %k1 -; AVX512BW-NEXT: korw %k1, %k0, %k1 -; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} -; AVX512BW-NEXT: retq -; -; XOP-LABEL: boolvec_sdiv: -; XOP: # %bb.0: -; XOP-NEXT: vpslld $31, %xmm1, %xmm1 -; XOP-NEXT: vpsrad $31, %xmm1, %xmm1 -; XOP-NEXT: vpslld $31, %xmm0, %xmm0 -; XOP-NEXT: vpsrad $31, %xmm0, %xmm0 -; XOP-NEXT: vpextrd $1, %xmm0, %eax -; XOP-NEXT: vpextrd $1, %xmm1, %ecx -; XOP-NEXT: cltd -; XOP-NEXT: idivl %ecx -; XOP-NEXT: movl %eax, %ecx -; XOP-NEXT: vmovd %xmm0, %eax -; XOP-NEXT: vmovd %xmm1, %esi -; XOP-NEXT: cltd -; XOP-NEXT: idivl %esi -; XOP-NEXT: vmovd %eax, %xmm2 -; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; XOP-NEXT: vpextrd $2, %xmm0, %eax -; XOP-NEXT: vpextrd $2, %xmm1, %ecx -; XOP-NEXT: cltd -; XOP-NEXT: idivl %ecx -; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; XOP-NEXT: vpextrd $3, %xmm0, %eax -; XOP-NEXT: vpextrd $3, %xmm1, %ecx -; XOP-NEXT: cltd -; XOP-NEXT: idivl %ecx -; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; XOP-NEXT: retq +; CHECK-LABEL: boolvec_sdiv: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = sdiv <4 x i1> %x, %y ret <4 x i1> %r } Index: llvm/trunk/test/CodeGen/X86/combine-srem.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-srem.ll +++ llvm/trunk/test/CodeGen/X86/combine-srem.ll @@ -462,16 +462,7 @@ define i1 @bool_srem(i1 %x, i1 %y) { ; CHECK-LABEL: bool_srem: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: negb %al -; CHECK-NEXT: # kill: def $al killed $al killed $eax -; CHECK-NEXT: cbtw -; CHECK-NEXT: andb $1, %sil -; CHECK-NEXT: negb %sil -; CHECK-NEXT: idivb %sil -; CHECK-NEXT: movsbl %ah, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %r = srem i1 %x, %y ret i1 %r @@ -479,61 +470,12 @@ define <4 x i1> @boolvec_srem(<4 x i1> %x, <4 x i1> %y) { ; SSE-LABEL: boolvec_srem: ; SSE: # %bb.0: -; SSE-NEXT: pslld $31, %xmm1 -; SSE-NEXT: psrad $31, %xmm1 -; SSE-NEXT: pslld $31, %xmm0 -; SSE-NEXT: psrad $31, %xmm0 -; SSE-NEXT: pextrd $1, %xmm0, %eax -; SSE-NEXT: pextrd $1, %xmm1, %ecx -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movd %xmm1, %esi -; SSE-NEXT: cltd -; SSE-NEXT: idivl %esi -; SSE-NEXT: movd %edx, %xmm2 -; SSE-NEXT: pinsrd $1, %ecx, %xmm2 -; SSE-NEXT: pextrd $2, %xmm0, %eax -; SSE-NEXT: pextrd $2, %xmm1, %ecx -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $2, %edx, %xmm2 -; SSE-NEXT: pextrd $3, %xmm0, %eax -; SSE-NEXT: pextrd $3, %xmm1, %ecx -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $3, %edx, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: boolvec_srem: ; AVX: # %bb.0: -; AVX-NEXT: vpslld $31, %xmm1, %xmm1 -; AVX-NEXT: vpsrad $31, %xmm1, %xmm1 -; AVX-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 -; AVX-NEXT: vpextrd $1, %xmm0, %eax -; AVX-NEXT: vpextrd $1, %xmm1, %ecx -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: movl %edx, %ecx -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: vmovd %xmm1, %esi -; AVX-NEXT: cltd -; AVX-NEXT: idivl %esi -; AVX-NEXT: vmovd %edx, %xmm2 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX-NEXT: vpextrd $2, %xmm0, %eax -; AVX-NEXT: vpextrd $2, %xmm1, %ecx -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: vpextrd $3, %xmm1, %ecx -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %r = srem <4 x i1> %x, %y ret <4 x i1> %r Index: llvm/trunk/test/CodeGen/X86/combine-udiv.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-udiv.ll +++ llvm/trunk/test/CodeGen/X86/combine-udiv.ll @@ -911,166 +911,17 @@ define i1 @bool_udiv(i1 %x, i1 %y) { ; CHECK-LABEL: bool_udiv: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %sil -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: # kill: def $eax killed $eax def $ax -; CHECK-NEXT: divb %sil +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %r = udiv i1 %x, %y ret i1 %r } define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) { -; SSE2-LABEL: boolvec_udiv: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: pand %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm2, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3] -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm3, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] -; SSE2-NEXT: movd %xmm3, %ecx -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm3 -; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; SSE2-NEXT: movdqa %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: boolvec_udiv: -; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; SSE41-NEXT: pand %xmm2, %xmm1 -; SSE41-NEXT: pand %xmm2, %xmm0 -; SSE41-NEXT: pextrd $1, %xmm0, %eax -; SSE41-NEXT: pextrd $1, %xmm1, %ecx -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: movd %xmm1, %esi -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %esi -; SSE41-NEXT: movd %eax, %xmm2 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm2 -; SSE41-NEXT: pextrd $2, %xmm0, %eax -; SSE41-NEXT: pextrd $2, %xmm1, %ecx -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: pinsrd $2, %eax, %xmm2 -; SSE41-NEXT: pextrd $3, %xmm0, %eax -; SSE41-NEXT: pextrd $3, %xmm1, %ecx -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: pinsrd $3, %eax, %xmm2 -; SSE41-NEXT: movdqa %xmm2, %xmm0 -; SSE41-NEXT: retq -; -; AVX1-LABEL: boolvec_udiv: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpextrd $1, %xmm0, %eax -; AVX1-NEXT: vpextrd $1, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: movl %eax, %ecx -; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: vmovd %xmm1, %esi -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %esi -; AVX1-NEXT: vmovd %eax, %xmm2 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $2, %xmm0, %eax -; AVX1-NEXT: vpextrd $2, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $3, %xmm0, %eax -; AVX1-NEXT: vpextrd $3, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: boolvec_udiv: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpextrd $1, %xmm0, %eax -; AVX2-NEXT: vpextrd $1, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: movl %eax, %ecx -; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: vmovd %xmm1, %esi -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %esi -; AVX2-NEXT: vmovd %eax, %xmm2 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $2, %xmm0, %eax -; AVX2-NEXT: vpextrd $2, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $3, %xmm0, %eax -; AVX2-NEXT: vpextrd $3, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; AVX2-NEXT: retq -; -; XOP-LABEL: boolvec_udiv: -; XOP: # %bb.0: -; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] -; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1 -; XOP-NEXT: vpand %xmm2, %xmm0, %xmm0 -; XOP-NEXT: vpextrd $1, %xmm0, %eax -; XOP-NEXT: vpextrd $1, %xmm1, %ecx -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: movl %eax, %ecx -; XOP-NEXT: vmovd %xmm0, %eax -; XOP-NEXT: vmovd %xmm1, %esi -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %esi -; XOP-NEXT: vmovd %eax, %xmm2 -; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; XOP-NEXT: vpextrd $2, %xmm0, %eax -; XOP-NEXT: vpextrd $2, %xmm1, %ecx -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2 -; XOP-NEXT: vpextrd $3, %xmm0, %eax -; XOP-NEXT: vpextrd $3, %xmm1, %ecx -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0 -; XOP-NEXT: retq +; CHECK-LABEL: boolvec_udiv: +; CHECK: # %bb.0: +; CHECK-NEXT: retq %r = udiv <4 x i1> %x, %y ret <4 x i1> %r } Index: llvm/trunk/test/CodeGen/X86/combine-urem.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/combine-urem.ll +++ llvm/trunk/test/CodeGen/X86/combine-urem.ll @@ -383,13 +383,7 @@ define i1 @bool_urem(i1 %x, i1 %y) { ; CHECK-LABEL: bool_urem: ; CHECK: # %bb.0: -; CHECK-NEXT: andb $1, %sil -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: movzbl %dil, %eax -; CHECK-NEXT: # kill: def $eax killed $eax def $ax -; CHECK-NEXT: divb %sil -; CHECK-NEXT: movzbl %ah, %eax -; CHECK-NEXT: # kill: def $al killed $al killed $eax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %r = urem i1 %x, %y ret i1 %r @@ -398,88 +392,13 @@ define <4 x i1> @boolvec_urem(<4 x i1> %x, <4 x i1> %y) { ; SSE-LABEL: boolvec_urem: ; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: pextrd $1, %xmm0, %eax -; SSE-NEXT: pextrd $1, %xmm1, %ecx -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: movd %xmm1, %esi -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %esi -; SSE-NEXT: movd %edx, %xmm2 -; SSE-NEXT: pinsrd $1, %ecx, %xmm2 -; SSE-NEXT: pextrd $2, %xmm0, %eax -; SSE-NEXT: pextrd $2, %xmm1, %ecx -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: pinsrd $2, %edx, %xmm2 -; SSE-NEXT: pextrd $3, %xmm0, %eax -; SSE-NEXT: pextrd $3, %xmm1, %ecx -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: pinsrd $3, %edx, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: boolvec_urem: -; AVX1: # %bb.0: -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpextrd $1, %xmm0, %eax -; AVX1-NEXT: vpextrd $1, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: movl %edx, %ecx -; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: vmovd %xmm1, %esi -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %esi -; AVX1-NEXT: vmovd %edx, %xmm2 -; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $2, %xmm0, %eax -; AVX1-NEXT: vpextrd $2, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; AVX1-NEXT: vpextrd $3, %xmm0, %eax -; AVX1-NEXT: vpextrd $3, %xmm1, %ecx -; AVX1-NEXT: xorl %edx, %edx -; AVX1-NEXT: divl %ecx -; AVX1-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: boolvec_urem: -; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpextrd $1, %xmm0, %eax -; AVX2-NEXT: vpextrd $1, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: movl %edx, %ecx -; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: vmovd %xmm1, %esi -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %esi -; AVX2-NEXT: vmovd %edx, %xmm2 -; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $2, %xmm0, %eax -; AVX2-NEXT: vpextrd $2, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; AVX2-NEXT: vpextrd $3, %xmm0, %eax -; AVX2-NEXT: vpextrd $3, %xmm1, %ecx -; AVX2-NEXT: xorl %edx, %edx -; AVX2-NEXT: divl %ecx -; AVX2-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: boolvec_urem: +; AVX: # %bb.0: +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq %r = urem <4 x i1> %x, %y ret <4 x i1> %r } Index: llvm/trunk/test/CodeGen/X86/pr38539.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr38539.ll +++ llvm/trunk/test/CodeGen/X86/pr38539.ll @@ -6,68 +6,13 @@ define void @f() { ; X64-LABEL: f: ; X64: # %bb.0: # %BB -; X64-NEXT: pushq %rbp -; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: pushq %r14 -; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: pushq %rbx -; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: subq $16, %rsp -; X64-NEXT: .cfi_def_cfa_offset 48 -; X64-NEXT: .cfi_offset %rbx, -32 -; X64-NEXT: .cfi_offset %r14, -24 -; X64-NEXT: .cfi_offset %rbp, -16 -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; X64-NEXT: movq (%rsp), %rbx ; X64-NEXT: movb (%rax), %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: # kill: def $eax killed $eax def $ax -; X64-NEXT: divb (%rax) -; X64-NEXT: movl %eax, %r14d -; X64-NEXT: movq %rbp, %rcx -; X64-NEXT: shlq $62, %rcx -; X64-NEXT: sarq $62, %rcx -; X64-NEXT: xorl %edi, %edi -; X64-NEXT: xorl %esi, %esi -; X64-NEXT: movq %rbx, %rdx -; X64-NEXT: callq __modti3 -; X64-NEXT: andl $3, %edx +; X64-NEXT: movb (%rax), %al ; X64-NEXT: testb %al, %al ; X64-NEXT: setne (%rax) -; X64-NEXT: cmpq %rax, %rbx -; X64-NEXT: sbbq %rdx, %rbp -; X64-NEXT: setae %dl -; X64-NEXT: sbbb %cl, %cl -; X64-NEXT: testb %al, %al -; X64-NEXT: setne %bl -; X64-NEXT: negb %dl -; X64-NEXT: cmpb %r14b, %al -; X64-NEXT: setle %al -; X64-NEXT: negb %al -; X64-NEXT: cbtw -; X64-NEXT: idivb %dl -; X64-NEXT: movsbl %ah, %eax -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: andl $1, %eax -; X64-NEXT: shlq $4, %rax -; X64-NEXT: negq %rax -; X64-NEXT: negb %bl -; X64-NEXT: leaq -16(%rsp,%rax), %rax +; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq %rax, (%rax) -; X64-NEXT: movl %ecx, %eax -; X64-NEXT: cbtw -; X64-NEXT: idivb %bl -; X64-NEXT: movsbl %ah, %eax -; X64-NEXT: andb $1, %al -; X64-NEXT: movb %al, (%rax) -; X64-NEXT: addq $16, %rsp -; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: popq %rbx -; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: popq %r14 -; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: popq %rbp -; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: movb $0, (%rax) ; X64-NEXT: retq ; ; X86-LABEL: f: @@ -77,75 +22,16 @@ ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $48, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: subl $16, %esp +; X86-NEXT: movb (%eax), %al ; X86-NEXT: movb (%eax), %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: # kill: def $eax killed $eax def $ax -; X86-NEXT: divb (%eax) -; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: movl %esi, %eax -; X86-NEXT: shll $30, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: sarl $30, %ecx -; X86-NEXT: sarl $31, %eax -; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %edx -; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $3, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl %eax, %esi -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: setae %dl -; X86-NEXT: sbbb %cl, %cl ; X86-NEXT: testb %al, %al -; X86-NEXT: setne %ch ; X86-NEXT: setne (%eax) -; X86-NEXT: negb %ch -; X86-NEXT: negb %dl -; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload -; X86-NEXT: setle %al -; X86-NEXT: negb %al -; X86-NEXT: cbtw -; X86-NEXT: idivb %dl -; X86-NEXT: movsbl %ah, %eax -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: negl %eax -; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: leal -4(%esp,%eax,4), %eax +; X86-NEXT: leal -{{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, (%eax) -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cbtw -; X86-NEXT: idivb %ch -; X86-NEXT: movsbl %ah, %eax -; X86-NEXT: andb $1, %al -; X86-NEXT: movb %al, (%eax) -; X86-NEXT: leal -12(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: movb $0, (%eax) +; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl @@ -177,50 +63,13 @@ define void @g() { ; X64-LABEL: g: ; X64: # %bb.0: # %BB -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi -; X64-NEXT: shlq $32, %rsi -; X64-NEXT: orq %rax, %rsi -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: shlq $30, %rdi -; X64-NEXT: sarq $30, %rdi ; X64-NEXT: movb (%rax), %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: # kill: def $eax killed $eax def $ax -; X64-NEXT: divb (%rax) -; X64-NEXT: movl %eax, %r8d -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: idivq %rdi -; X64-NEXT: movabsq $17179869183, %rax # imm = 0x3FFFFFFFF -; X64-NEXT: andq %rdx, %rax +; X64-NEXT: movb (%rax), %al ; X64-NEXT: testb %al, %al -; X64-NEXT: setne %dil ; X64-NEXT: setne (%rax) -; X64-NEXT: cmpq %rsi, %rax -; X64-NEXT: seta %dl -; X64-NEXT: setbe %cl -; X64-NEXT: negb %cl -; X64-NEXT: cmpb %r8b, %al -; X64-NEXT: setle %al -; X64-NEXT: negb %al -; X64-NEXT: cbtw -; X64-NEXT: idivb %cl -; X64-NEXT: movsbl %ah, %eax -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: andl $1, %eax -; X64-NEXT: shlq $3, %rax -; X64-NEXT: negq %rax -; X64-NEXT: negb %dil -; X64-NEXT: negb %dl -; X64-NEXT: leaq -16(%rsp,%rax), %rax +; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq %rax, (%rax) -; X64-NEXT: movl %edx, %eax -; X64-NEXT: cbtw -; X64-NEXT: idivb %dil -; X64-NEXT: movsbl %ah, %eax -; X64-NEXT: andb $1, %al -; X64-NEXT: movb %al, (%rax) +; X64-NEXT: movb $0, (%rax) ; X64-NEXT: retq ; ; X86-LABEL: g: @@ -230,63 +79,16 @@ ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl (%esp), %edi +; X86-NEXT: subl $8, %esp +; X86-NEXT: movb (%eax), %al ; X86-NEXT: movb (%eax), %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: # kill: def $eax killed $eax def $ax -; X86-NEXT: divb (%eax) -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: shll $30, %eax -; X86-NEXT: sarl $30, %eax -; X86-NEXT: pushl %eax -; X86-NEXT: pushl %edi -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: calll __moddi3 -; X86-NEXT: addl $16, %esp -; X86-NEXT: andl $3, %edx ; X86-NEXT: testb %al, %al ; X86-NEXT: setne (%eax) -; X86-NEXT: cmpl %eax, %edi -; X86-NEXT: sbbl %edx, %esi -; X86-NEXT: setae %dl -; X86-NEXT: sbbb %cl, %cl -; X86-NEXT: testb %al, %al -; X86-NEXT: setne %ch -; X86-NEXT: negb %dl -; X86-NEXT: cmpb %bl, %al -; X86-NEXT: setle %al -; X86-NEXT: negb %al -; X86-NEXT: cbtw -; X86-NEXT: idivb %dl -; X86-NEXT: movsbl %ah, %eax -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: shll $3, %eax -; X86-NEXT: negl %eax -; X86-NEXT: negb %ch -; X86-NEXT: leal -8(%esp,%eax), %eax +; X86-NEXT: leal -{{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, (%eax) -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cbtw -; X86-NEXT: idivb %ch -; X86-NEXT: movsbl %ah, %eax -; X86-NEXT: andb $1, %al -; X86-NEXT: movb %al, (%eax) -; X86-NEXT: leal -12(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: movb $0, (%eax) +; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl