Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3128,8 +3128,11 @@ if (N0.isUndef()) return DAG.getConstant(0, DL, VT); - // TODO: 0 / X -> 0 - // TODO: 0 % X -> 0 + // 0 / X -> 0 + // 0 % X -> 0 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + if (N0C && N0C->isNullValue()) + return N0; // X / X -> 1 // X % X -> 0 Index: test/CodeGen/X86/combine-sdiv.ll =================================================================== --- test/CodeGen/X86/combine-sdiv.ll +++ test/CodeGen/X86/combine-sdiv.ll @@ -107,99 +107,25 @@ ret <4 x i32> %1 } -; TODO fold (sdiv 0, x) -> 0 +; fold (sdiv 0, x) -> 0 define i32 @combine_sdiv_zero(i32 %x) { ; CHECK-LABEL: combine_sdiv_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivl %edi ; CHECK-NEXT: retq %1 = sdiv i32 0, %x ret i32 %1 } define <4 x i32> @combine_vec_sdiv_zero(<4 x i32> %x) { -; SSE2-LABEL: combine_vec_sdiv_zero: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: combine_vec_sdiv_zero: +; SSE: # %bb.0: +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: retq ; -; SSE41-LABEL: combine_vec_sdiv_zero: -; SSE41: # %bb.0: -; SSE41-NEXT: pextrd $1, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %esi -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: idivl %esi -; SSE41-NEXT: movd %eax, %xmm1 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE41-NEXT: pextrd $2, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $2, %eax, %xmm1 -; SSE41-NEXT: pextrd $3, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $3, %eax, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq -; ; AVX-LABEL: combine_vec_sdiv_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %esi -; AVX-NEXT: vmovd %eax, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = sdiv <4 x i32> zeroinitializer, %x ret <4 x i32> %1 Index: test/CodeGen/X86/combine-srem.ll =================================================================== --- test/CodeGen/X86/combine-srem.ll +++ test/CodeGen/X86/combine-srem.ll @@ -100,14 +100,11 @@ ret <4 x i32> %1 } -; TODO fold (srem 0, x) -> 0 +; fold (srem 0, x) -> 0 define i32 @combine_srem_zero(i32 %x) { ; CHECK-LABEL: combine_srem_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivl %edi -; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %1 = srem i32 0, %x ret i32 %1 @@ -116,53 +113,12 @@ define <4 x i32> @combine_vec_srem_zero(<4 x i32> %x) { ; SSE-LABEL: combine_vec_srem_zero: ; SSE: # %bb.0: -; SSE-NEXT: pextrd $1, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: idivl %ecx -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %esi -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: idivl %esi -; SSE-NEXT: movd %edx, %xmm1 -; SSE-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE-NEXT: pextrd $2, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $2, %edx, %xmm1 -; SSE-NEXT: pextrd $3, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $3, %edx, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_srem_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: movl %edx, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %esi -; AVX-NEXT: vmovd %edx, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $3, %edx, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = srem <4 x i32> zeroinitializer, %x ret <4 x i32> %1 Index: test/CodeGen/X86/combine-udiv.ll =================================================================== --- test/CodeGen/X86/combine-udiv.ll +++ test/CodeGen/X86/combine-udiv.ll @@ -90,124 +90,30 @@ ret <4 x i32> %1 } -; TODO fold (udiv 0, x) -> 0 +; fold (udiv 0, x) -> 0 define i32 @combine_udiv_zero(i32 %x) { ; CHECK-LABEL: combine_udiv_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: divl %edi ; CHECK-NEXT: retq %1 = udiv i32 0, %x ret i32 %1 } define <4 x i32> @combine_vec_udiv_zero(<4 x i32> %x) { -; SSE2-LABEL: combine_vec_udiv_zero: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: combine_vec_udiv_zero: +; SSE: # %bb.0: +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: retq ; -; SSE41-LABEL: combine_vec_udiv_zero: -; SSE41: # %bb.0: -; SSE41-NEXT: pextrd $1, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %esi -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %esi -; SSE41-NEXT: movd %eax, %xmm1 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE41-NEXT: pextrd $2, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: pinsrd $2, %eax, %xmm1 -; SSE41-NEXT: pextrd $3, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: pinsrd $3, %eax, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq -; ; AVX-LABEL: combine_vec_udiv_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %esi -; AVX-NEXT: vmovd %eax, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: combine_vec_udiv_zero: ; XOP: # %bb.0: -; XOP-NEXT: vpextrd $1, %xmm0, %ecx -; XOP-NEXT: xorl %eax, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: movl %eax, %ecx -; XOP-NEXT: vmovd %xmm0, %esi -; XOP-NEXT: xorl %eax, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %esi -; XOP-NEXT: vmovd %eax, %xmm1 -; XOP-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; XOP-NEXT: vpextrd $2, %xmm0, %ecx -; XOP-NEXT: xorl %eax, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; XOP-NEXT: vpextrd $3, %xmm0, %ecx -; XOP-NEXT: xorl %eax, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; XOP-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; XOP-NEXT: retq %1 = udiv <4 x i32> zeroinitializer, %x ret <4 x i32> %1 Index: test/CodeGen/X86/combine-urem.ll =================================================================== --- test/CodeGen/X86/combine-urem.ll +++ test/CodeGen/X86/combine-urem.ll @@ -89,14 +89,11 @@ ret <4 x i32> %1 } -; TODO fold (urem 0, x) -> 0 +; fold (urem 0, x) -> 0 define i32 @combine_urem_zero(i32 %x) { ; CHECK-LABEL: combine_urem_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: divl %edi -; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %1 = urem i32 0, %x ret i32 %1 @@ -105,53 +102,12 @@ define <4 x i32> @combine_vec_urem_zero(<4 x i32> %x) { ; SSE-LABEL: combine_vec_urem_zero: ; SSE: # %bb.0: -; SSE-NEXT: pextrd $1, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %esi -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %esi -; SSE-NEXT: movd %edx, %xmm1 -; SSE-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE-NEXT: pextrd $2, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: pinsrd $2, %edx, %xmm1 -; SSE-NEXT: pextrd $3, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: pinsrd $3, %edx, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_urem_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: movl %edx, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %esi -; AVX-NEXT: vmovd %edx, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: vpinsrd $3, %edx, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = urem <4 x i32> zeroinitializer, %x ret <4 x i32> %1 Index: test/CodeGen/X86/copy-eflags.ll =================================================================== --- test/CodeGen/X86/copy-eflags.ll +++ test/CodeGen/X86/copy-eflags.ll @@ -215,76 +215,49 @@ ; X32-NEXT: .cfi_offset %edi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movb {{[0-9]+}}(%esp), %ch -; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: movb {{[0-9]+}}(%esp), %bl +; X32-NEXT: movb {{[0-9]+}}(%esp), %bh ; X32-NEXT: jmp .LBB3_1 ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB3_5: # %bb1 +; X32-NEXT: .LBB3_3: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: idivl %ebp +; X32-NEXT: movb %bh, (%esi) +; X32-NEXT: movl (%edx), %edi ; X32-NEXT: .LBB3_1: # %bb1 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movsbl %cl, %eax -; X32-NEXT: movl %eax, %edx -; X32-NEXT: sarl $31, %edx -; X32-NEXT: cmpl %eax, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: sbbl %edx, %eax -; X32-NEXT: setl %al -; X32-NEXT: setl %dl -; X32-NEXT: movzbl %dl, %ebp -; X32-NEXT: negl %ebp -; X32-NEXT: testb %al, %al -; X32-NEXT: jne .LBB3_3 +; X32-NEXT: movsbl %bh, %edi +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: sarl $31, %ebp +; X32-NEXT: cmpl %edi, %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: sbbl %ebp, %edi +; X32-NEXT: jl .LBB3_3 ; X32-NEXT: # %bb.2: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movb %ch, %cl -; X32-NEXT: .LBB3_3: # %bb1 -; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movb %cl, (%ebx) -; X32-NEXT: movl (%edi), %edx -; X32-NEXT: testb %al, %al -; X32-NEXT: jne .LBB3_5 -; X32-NEXT: # %bb.4: # %bb1 -; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: jmp .LBB3_5 +; X32-NEXT: movb %bl, %bh +; X32-NEXT: jmp .LBB3_3 ; ; X64-LABEL: PR37100: ; X64: # %bb.0: # %bb -; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: jmp .LBB3_1 ; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_5: # %bb1 +; X64-NEXT: .LBB3_3: # %bb1 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: idivl %esi +; X64-NEXT: movb %dil, (%r8) +; X64-NEXT: movl (%r9), %eax ; X64-NEXT: .LBB3_1: # %bb1 ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movsbq %dil, %rax -; X64-NEXT: xorl %esi, %esi -; X64-NEXT: cmpq %rax, %r10 -; X64-NEXT: setl %sil -; X64-NEXT: negl %esi -; X64-NEXT: cmpq %rax, %r10 +; X64-NEXT: cmpq %rax, %rdx ; X64-NEXT: jl .LBB3_3 ; X64-NEXT: # %bb.2: # %bb1 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X64-NEXT: movl %ecx, %edi -; X64-NEXT: .LBB3_3: # %bb1 -; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X64-NEXT: movb %dil, (%r8) -; X64-NEXT: jl .LBB3_5 -; X64-NEXT: # %bb.4: # %bb1 -; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X64-NEXT: movl (%r9), %esi -; X64-NEXT: jmp .LBB3_5 +; X64-NEXT: jmp .LBB3_3 bb: br label %bb1 @@ -315,40 +288,27 @@ ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_offset %esi, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl (%eax), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: sarl $31, %ecx -; X32-NEXT: cmpl %eax, %eax -; X32-NEXT: sbbl %ecx, %eax -; X32-NEXT: setb %al -; X32-NEXT: sbbb %cl, %cl -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movb %cl, (%edx) -; X32-NEXT: movzbl %al, %eax -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: idivl %ecx -; X32-NEXT: movb %dl, (%esi) +; X32-NEXT: movl (%edx), %edx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: sarl $31, %esi +; X32-NEXT: cmpl %edx, %eax +; X32-NEXT: sbbl %esi, %edx +; X32-NEXT: sbbb %dl, %dl +; X32-NEXT: movb %dl, (%ecx) +; X32-NEXT: movb $0, (%eax) ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; ; X64-LABEL: PR37431: ; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movslq (%rdi), %rax ; X64-NEXT: cmpq %rax, %rax -; X64-NEXT: sbbb %dl, %dl -; X64-NEXT: cmpq %rax, %rax -; X64-NEXT: movb %dl, (%rsi) -; X64-NEXT: sbbl %esi, %esi -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: idivl %esi -; X64-NEXT: movb %dl, (%rcx) +; X64-NEXT: sbbb %al, %al +; X64-NEXT: movb %al, (%rsi) +; X64-NEXT: movb $0, (%rdx) ; X64-NEXT: retq entry: %tmp = load i32, i32* %arg1 Index: test/CodeGen/X86/pr32282.ll =================================================================== --- test/CodeGen/X86/pr32282.ll +++ test/CodeGen/X86/pr32282.ll @@ -12,33 +12,10 @@ define void @foo() { ; X86-LABEL: foo: ; X86: # %bb.0: -; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: movl d, %eax -; X86-NEXT: notl %eax -; X86-NEXT: movl d+4, %ecx -; X86-NEXT: notl %ecx -; X86-NEXT: andl $701685459, %ecx # imm = 0x29D2DED3 -; X86-NEXT: andl $-564453154, %eax # imm = 0xDE5B20DE -; X86-NEXT: shrdl $21, %ecx, %eax -; X86-NEXT: shrl $21, %ecx -; X86-NEXT: andl $-2, %eax -; X86-NEXT: addl $7, %eax -; X86-NEXT: adcl $0, %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl $0 -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl $0 -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: calll __divdi3 -; X86-NEXT: addl $16, %esp -; X86-NEXT: .cfi_adjust_cfa_offset -16 -; X86-NEXT: orl %eax, %edx -; X86-NEXT: setne {{[0-9]+}}(%esp) -; X86-NEXT: popl %eax +; X86-NEXT: subl $1, %esp +; X86-NEXT: .cfi_def_cfa_offset 5 +; X86-NEXT: movb $0, (%esp) +; X86-NEXT: addl $1, %esp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; @@ -46,25 +23,12 @@ ; X64: # %bb.0: ; X64-NEXT: movq {{.*}}(%rip), %rax ; X64-NEXT: movabsq $3013716102212485120, %rcx # imm = 0x29D2DED3DE400000 -; X64-NEXT: andnq %rcx, %rax, %rcx -; X64-NEXT: shrq $21, %rcx -; X64-NEXT: addq $7, %rcx -; X64-NEXT: movabsq $4393751543808, %rax # imm = 0x3FF00000000 -; X64-NEXT: testq %rax, %rcx -; X64-NEXT: je .LBB0_1 -; X64-NEXT: # %bb.2: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divq %rcx -; X64-NEXT: jmp .LBB0_3 -; X64-NEXT: .LBB0_1: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divl %ecx -; X64-NEXT: # kill: def $eax killed $eax def $rax -; X64-NEXT: .LBB0_3: -; X64-NEXT: testq %rax, %rax -; X64-NEXT: setne -{{[0-9]+}}(%rsp) +; X64-NEXT: andnq %rcx, %rax, %rax +; X64-NEXT: shrq $21, %rax +; X64-NEXT: addq $7, %rax +; X64-NEXT: movabsq $4393751543808, %rcx # imm = 0x3FF00000000 +; X64-NEXT: testq %rcx, %rax +; X64-NEXT: movb $0, -{{[0-9]+}}(%rsp) ; X64-NEXT: retq %1 = alloca i8, align 1 %2 = load i64, i64* @d, align 8