Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3080,12 +3080,15 @@ EVT VT = N->getValueType(0); SDLoc DL(N); + unsigned Opc = N->getOpcode(); + bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc); + // X / undef -> undef // X % undef -> undef // X / 0 -> undef // X % 0 -> undef // NOTE: This includes vectors where any divisor element is zero/undef. - if (DAG.isUndef(N->getOpcode(), {N0, N1})) + if (DAG.isUndef(Opc, {N0, N1})) return DAG.getUNDEF(VT); // undef / X -> 0 @@ -3093,6 +3096,20 @@ if (N0.isUndef()) return DAG.getConstant(0, DL, VT); + // TODO: 0 / X -> 0 + // TODO: 0 % X -> 0 + + // X / X -> 1 + // X % X -> 0 + if (N0 == N1) + return DAG.getConstant(IsDiv ? 1 : 0, DL, VT); + + // TODO: X / 1 -> X + // TODO: X % 1 -> 0 + // If this is a boolean op (single-bit element type), we can't have + // division-by-zero or remainder-by-zero, so assume the divisor is 1. + // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1. + return SDValue(); } Index: test/CodeGen/MSP430/libcalls.ll =================================================================== --- test/CodeGen/MSP430/libcalls.ll +++ test/CodeGen/MSP430/libcalls.ll @@ -433,9 +433,10 @@ ; CHECK: call #__mspabi_divi %0 = load volatile i16, i16* @g_i16, align 8 - %1 = sdiv i16 %0, %0 + %1 = load volatile i16, i16* @g_i16, align 8 + %2 = sdiv i16 %0, %1 - ret i16 %1 + ret i16 %2 } define i32 @divli() #0 { @@ -444,9 +445,10 @@ ; CHECK: call #__mspabi_divli %0 = load volatile i32, i32* @g_i32, align 8 - %1 = sdiv i32 %0, %0 + %1 = load volatile i32, i32* @g_i32, align 8 + %2 = sdiv i32 %0, %1 - ret i32 %1 + ret i32 %2 } define i64 @divlli() #0 { @@ -455,9 +457,10 @@ ; CHECK: call #__mspabi_divlli %0 = load volatile i64, i64* @g_i64, align 8 - %1 = sdiv i64 %0, %0 + %1 = load volatile i64, i64* @g_i64, align 8 + %2 = sdiv i64 %0, %1 - ret i64 %1 + ret i64 %2 } define i16 @divu() #0 { @@ -466,9 +469,10 @@ ; CHECK: call #__mspabi_divu %0 = load volatile i16, i16* @g_i16, align 8 - %1 = udiv i16 %0, %0 + %1 = load volatile i16, i16* @g_i16, align 8 + %2 = udiv i16 %0, %1 - ret i16 %1 + ret i16 %2 } define i32 @divul() #0 { @@ -477,9 +481,10 @@ ; CHECK: call #__mspabi_divul %0 = load volatile i32, i32* @g_i32, align 8 - %1 = udiv i32 %0, %0 + %1 = load volatile i32, i32* @g_i32, align 8 + %2 = udiv i32 %0, %1 - ret i32 %1 + ret i32 %2 } define i64 @divull() #0 { @@ -488,9 +493,10 @@ ; CHECK: call #__mspabi_divull %0 = load volatile i64, i64* @g_i64, align 8 - %1 = udiv i64 %0, %0 + %1 = load volatile i64, i64* @g_i64, align 8 + %2 = udiv i64 %0, %1 - ret i64 %1 + ret i64 %2 } define i16 @remi() #0 { @@ -499,9 +505,10 @@ ; CHECK: call #__mspabi_remi %0 = load volatile i16, i16* @g_i16, align 8 - %1 = srem i16 %0, %0 + %1 = load volatile i16, i16* @g_i16, align 8 + %2 = srem i16 %0, %1 - ret i16 %1 + ret i16 %2 } define i32 @remli() #0 { @@ -510,9 +517,10 @@ ; CHECK: call #__mspabi_remli %0 = load volatile i32, i32* @g_i32, align 8 - %1 = srem i32 %0, %0 + %1 = load volatile i32, i32* @g_i32, align 8 + %2 = srem i32 %0, %1 - ret i32 %1 + ret i32 %2 } define i64 @remlli() #0 { @@ -521,9 +529,10 @@ ; CHECK: call #__mspabi_remlli %0 = load volatile i64, i64* @g_i64, align 8 - %1 = srem i64 %0, %0 + %1 = load volatile i64, i64* @g_i64, align 8 + %2 = srem i64 %0, %1 - ret i64 %1 + ret i64 %2 } define i16 @remu() #0 { @@ -532,9 +541,10 @@ ; CHECK: call #__mspabi_remu %0 = load volatile i16, i16* @g_i16, align 8 - %1 = urem i16 %0, %0 + %1 = load volatile i16, i16* @g_i16, align 8 + %2 = urem i16 %0, %1 - ret i16 %1 + ret i16 %2 } define i32 @remul() #0 { @@ -543,9 +553,10 @@ ; CHECK: call #__mspabi_remul %0 = load volatile i32, i32* @g_i32, align 8 - %1 = urem i32 %0, %0 + %1 = load volatile i32, i32* @g_i32, align 8 + %2 = urem i32 %0, %1 - ret i32 %1 + ret i32 %2 } define i64 @remull() #0 { @@ -554,9 +565,10 @@ ; CHECK: call #__mspabi_remull %0 = load volatile i64, i64* @g_i64, align 8 - %1 = urem i64 %0, %0 + %1 = load volatile i64, i64* @g_i64, align 8 + %2 = urem i64 %0, %1 - ret i64 %1 + ret i64 %2 } define i16 @mpyi() #0 { Index: test/CodeGen/SystemZ/pr32372.ll =================================================================== --- test/CodeGen/SystemZ/pr32372.ll +++ test/CodeGen/SystemZ/pr32372.ll @@ -4,17 +4,18 @@ define void @pr32372(i8*) { ; CHECK-LABEL: pr32372: ; CHECK: # %bb.0: # %BB -; CHECK-NEXT: llc %r1, 0(%r2) +; CHECK-NEXT: llc %r0, 0(%r2) ; CHECK-NEXT: mvhhi 0(%r1), -3825 -; CHECK-NEXT: llill %r0, 0 -; CHECK-NEXT: dlr %r0, %r1 +; CHECK-NEXT: llc %r3, 0(%r2) +; CHECK-NEXT: llill %r2, 0 +; CHECK-NEXT: dlr %r2, %r0 ; CHECK-NEXT: .LBB0_1: # %CF251 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: j .LBB0_1 BB: - %L = load i8, i8* %0 + %L = load volatile i8, i8* %0 store i16 -3825, i16* undef - %L5 = load i8, i8* %0 + %L5 = load volatile i8, i8* %0 %B9 = urem i8 %L5, %L %I107 = insertelement <8 x i8> zeroinitializer, i8 %B9, i32 7 %ZE141 = zext i8 %L5 to i16 Index: test/CodeGen/X86/combine-sdiv.ll =================================================================== --- test/CodeGen/X86/combine-sdiv.ll +++ test/CodeGen/X86/combine-sdiv.ll @@ -205,100 +205,36 @@ ret <4 x i32> %1 } -; TODO fold (sdiv x, x) -> 1 +; fold (sdiv x, x) -> 1 define i32 @combine_sdiv_dupe(i32 %x) { ; CHECK-LABEL: combine_sdiv_dupe: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: cltd -; CHECK-NEXT: idivl %edi +; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: retq %1 = sdiv i32 %x, %x ret i32 %1 } define <4 x i32> @combine_vec_sdiv_dupe(<4 x i32> %x) { -; SSE2-LABEL: combine_vec_sdiv_dupe: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: movl %ecx, %eax -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: movl %ecx, %eax -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: movl %ecx, %eax -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: movl %ecx, %eax -; SSE2-NEXT: cltd -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: combine_vec_sdiv_dupe: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1] +; SSE-NEXT: retq ; -; SSE41-LABEL: combine_vec_sdiv_dupe: -; SSE41: # %bb.0: -; SSE41-NEXT: pextrd $1, %xmm0, %ecx -; SSE41-NEXT: movl %ecx, %eax -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %esi -; SSE41-NEXT: movl %esi, %eax -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %esi -; SSE41-NEXT: movd %eax, %xmm1 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE41-NEXT: pextrd $2, %xmm0, %ecx -; SSE41-NEXT: movl %ecx, %eax -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $2, %eax, %xmm1 -; SSE41-NEXT: pextrd $3, %xmm0, %ecx -; SSE41-NEXT: movl %ecx, %eax -; SSE41-NEXT: cltd -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $3, %eax, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq +; AVX1-LABEL: combine_vec_sdiv_dupe: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1] +; AVX1-NEXT: retq ; -; AVX-LABEL: combine_vec_sdiv_dupe: -; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: movl %esi, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %esi -; AVX-NEXT: vmovd %eax, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX2ORLATER-LABEL: combine_vec_sdiv_dupe: +; AVX2ORLATER: # %bb.0: +; AVX2ORLATER-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1] +; AVX2ORLATER-NEXT: retq +; +; XOP-LABEL: combine_vec_sdiv_dupe: +; XOP: # %bb.0: +; XOP-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1] +; XOP-NEXT: retq %1 = sdiv <4 x i32> %x, %x ret <4 x i32> %1 } Index: test/CodeGen/X86/combine-srem.ll =================================================================== --- test/CodeGen/X86/combine-srem.ll +++ test/CodeGen/X86/combine-srem.ll @@ -168,14 +168,11 @@ ret <4 x i32> %1 } -; TODO fold (srem x, x) -> 0 +; fold (srem x, x) -> 0 define i32 @combine_srem_dupe(i32 %x) { ; CHECK-LABEL: combine_srem_dupe: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: cltd -; CHECK-NEXT: idivl %edi -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %1 = srem i32 %x, %x ret i32 %1 @@ -184,53 +181,12 @@ define <4 x i32> @combine_vec_srem_dupe(<4 x i32> %x) { ; SSE-LABEL: combine_vec_srem_dupe: ; SSE: # %bb.0: -; SSE-NEXT: pextrd $1, %xmm0, %ecx -; SSE-NEXT: movl %ecx, %eax -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %esi -; SSE-NEXT: movl %esi, %eax -; SSE-NEXT: cltd -; SSE-NEXT: idivl %esi -; SSE-NEXT: movd %edx, %xmm1 -; SSE-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE-NEXT: pextrd $2, %xmm0, %ecx -; SSE-NEXT: movl %ecx, %eax -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $2, %edx, %xmm1 -; SSE-NEXT: pextrd $3, %xmm0, %ecx -; SSE-NEXT: movl %ecx, %eax -; SSE-NEXT: cltd -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $3, %edx, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_srem_dupe: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: movl %edx, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: movl %esi, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %esi -; AVX-NEXT: vmovd %edx, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: cltd -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $3, %edx, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = srem <4 x i32> %x, %x ret <4 x i32> %1 Index: test/CodeGen/X86/combine-udiv.ll =================================================================== --- test/CodeGen/X86/combine-udiv.ll +++ test/CodeGen/X86/combine-udiv.ll @@ -213,108 +213,35 @@ ret <4 x i32> %1 } -; TODO fold (udiv x, x) -> 1 +; fold (udiv x, x) -> 1 define i32 @combine_udiv_dupe(i32 %x) { ; CHECK-LABEL: combine_udiv_dupe: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divl %edi +; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: retq %1 = udiv i32 %x, %x ret i32 %1 } define <4 x i32> @combine_vec_udiv_dupe(<4 x i32> %x) { -; SSE2-LABEL: combine_vec_udiv_dupe: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm2, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %eax -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %eax -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %eax -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: combine_vec_udiv_dupe: +; SSE: # %bb.0: +; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1] +; SSE-NEXT: retq ; -; SSE41-LABEL: combine_vec_udiv_dupe: -; SSE41: # %bb.0: -; SSE41-NEXT: pextrd $1, %xmm0, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %eax -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %eax -; SSE41-NEXT: movd %eax, %xmm1 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE41-NEXT: pextrd $2, %xmm0, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %eax -; SSE41-NEXT: pinsrd $2, %eax, %xmm1 -; SSE41-NEXT: pextrd $3, %xmm0, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %eax -; SSE41-NEXT: pinsrd $3, %eax, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq +; AVX1-LABEL: combine_vec_udiv_dupe: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1] +; AVX1-NEXT: retq ; -; AVX-LABEL: combine_vec_udiv_dupe: -; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %eax -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %eax -; AVX-NEXT: vmovd %eax, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %eax -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %eax -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 -; AVX-NEXT: retq +; AVX2-LABEL: combine_vec_udiv_dupe: +; AVX2: # %bb.0: +; AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1] +; AVX2-NEXT: retq ; ; XOP-LABEL: combine_vec_udiv_dupe: ; XOP: # %bb.0: -; XOP-NEXT: vpextrd $1, %xmm0, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %eax -; XOP-NEXT: movl %eax, %ecx -; XOP-NEXT: vmovd %xmm0, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %eax -; XOP-NEXT: vmovd %eax, %xmm1 -; XOP-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; XOP-NEXT: vpextrd $2, %xmm0, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %eax -; XOP-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; XOP-NEXT: vpextrd $3, %xmm0, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %eax -; XOP-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; XOP-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1] ; XOP-NEXT: retq %1 = udiv <4 x i32> %x, %x ret <4 x i32> %1 Index: test/CodeGen/X86/combine-urem.ll =================================================================== --- test/CodeGen/X86/combine-urem.ll +++ test/CodeGen/X86/combine-urem.ll @@ -157,14 +157,11 @@ ret <4 x i32> %1 } -; TODO fold (urem x, x) -> 0 +; fold (urem x, x) -> 0 define i32 @combine_urem_dupe(i32 %x) { ; CHECK-LABEL: combine_urem_dupe: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: divl %edi -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: retq %1 = urem i32 %x, %x ret i32 %1 @@ -173,45 +170,12 @@ define <4 x i32> @combine_vec_urem_dupe(<4 x i32> %x) { ; SSE-LABEL: combine_vec_urem_dupe: ; SSE: # %bb.0: -; SSE-NEXT: pextrd $1, %xmm0, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %eax -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %eax -; SSE-NEXT: movd %edx, %xmm1 -; SSE-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE-NEXT: pextrd $2, %xmm0, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %eax -; SSE-NEXT: pinsrd $2, %edx, %xmm1 -; SSE-NEXT: pextrd $3, %xmm0, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %eax -; SSE-NEXT: pinsrd $3, %edx, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_urem_dupe: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %eax -; AVX-NEXT: movl %edx, %ecx -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %eax -; AVX-NEXT: vmovd %edx, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %eax -; AVX-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %eax -; AVX-NEXT: vpinsrd $3, %edx, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = urem <4 x i32> %x, %x ret <4 x i32> %1 Index: test/CodeGen/X86/pr38539.ll =================================================================== --- test/CodeGen/X86/pr38539.ll +++ test/CodeGen/X86/pr38539.ll @@ -6,67 +6,57 @@ define void @f() { ; X64-LABEL: f: ; X64: # %bb.0: # %BB -; X64-NEXT: pushq %rbp -; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: pushq %r14 -; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: pushq %rbx -; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: subq $16, %rsp +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: subq $24, %rsp ; X64-NEXT: .cfi_def_cfa_offset 48 -; X64-NEXT: .cfi_offset %rbx, -32 -; X64-NEXT: .cfi_offset %r14, -24 -; X64-NEXT: .cfi_offset %rbp, -16 +; X64-NEXT: .cfi_offset %rbx, -24 +; X64-NEXT: .cfi_offset %r14, -16 ; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; X64-NEXT: movq %rbx, %rcx ; X64-NEXT: shlq $62, %rcx ; X64-NEXT: sarq $62, %rcx -; X64-NEXT: movq (%rsp), %r14 -; X64-NEXT: movb (%rax), %bpl +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 ; X64-NEXT: xorl %edi, %edi ; X64-NEXT: xorl %esi, %esi ; X64-NEXT: movq %r14, %rdx ; X64-NEXT: callq __modti3 ; X64-NEXT: andl $3, %edx +; X64-NEXT: testb %al, %al +; X64-NEXT: setne (%rax) ; X64-NEXT: cmpq %rax, %r14 ; X64-NEXT: sbbq %rdx, %rbx -; X64-NEXT: setb %sil -; X64-NEXT: setae %bl +; X64-NEXT: setae %dl +; X64-NEXT: sbbb %cl, %cl ; X64-NEXT: testb %al, %al -; X64-NEXT: setne %dl -; X64-NEXT: setne (%rax) -; X64-NEXT: movzbl %bpl, %eax -; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: subb %sil, %cl -; X64-NEXT: # kill: def $eax killed $eax def $ax -; X64-NEXT: divb %al -; X64-NEXT: negb %bl -; X64-NEXT: cmpb %al, %al -; X64-NEXT: setle %al +; X64-NEXT: setne %bl +; X64-NEXT: negb %dl +; X64-NEXT: cmpb $2, %al +; X64-NEXT: setl %al ; X64-NEXT: negb %al ; X64-NEXT: cbtw -; X64-NEXT: idivb %bl +; X64-NEXT: idivb %dl ; X64-NEXT: movsbl %ah, %eax ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: andl $1, %eax ; X64-NEXT: shlq $4, %rax ; X64-NEXT: negq %rax -; X64-NEXT: negb %dl -; X64-NEXT: leaq -16(%rsp,%rax), %rax +; X64-NEXT: negb %bl +; X64-NEXT: leaq -8(%rsp,%rax), %rax ; X64-NEXT: movq %rax, (%rax) ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: cbtw -; X64-NEXT: idivb %dl +; X64-NEXT: idivb %bl ; X64-NEXT: movsbl %ah, %eax ; X64-NEXT: andb $1, %al ; X64-NEXT: movb %al, (%rax) -; X64-NEXT: addq $16, %rsp -; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: popq %rbx +; X64-NEXT: addq $24, %rsp ; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: popq %r14 +; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: popq %rbp +; X64-NEXT: popq %r14 ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; @@ -81,7 +71,7 @@ ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $48, %esp +; X86-NEXT: subl $40, %esp ; X86-NEXT: .cfi_offset %esi, -20 ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 @@ -91,10 +81,8 @@ ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarl $30, %ecx ; X86-NEXT: sarl $31, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl (%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movb (%eax), %dl -; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx @@ -119,14 +107,10 @@ ; X86-NEXT: testb %al, %al ; X86-NEXT: setne %ch ; X86-NEXT: setne (%eax) -; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dh # 1-byte Reload -; X86-NEXT: movzbl %dh, %eax -; X86-NEXT: # kill: def $eax killed $eax def $ax -; X86-NEXT: divb %dh ; X86-NEXT: negb %ch ; X86-NEXT: negb %dl -; X86-NEXT: cmpb %al, %al -; X86-NEXT: setle %al +; X86-NEXT: cmpb $2, %al +; X86-NEXT: setl %al ; X86-NEXT: negb %al ; X86-NEXT: cbtw ; X86-NEXT: idivb %dl @@ -135,7 +119,7 @@ ; X86-NEXT: andl $1, %eax ; X86-NEXT: negl %eax ; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: leal -4(%esp,%eax,4), %eax +; X86-NEXT: leal -12(%esp,%eax,4), %eax ; X86-NEXT: movl %eax, (%eax) ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: cbtw @@ -178,46 +162,41 @@ ; X64-LABEL: g: ; X64: # %bb.0: # %BB ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi -; X64-NEXT: shlq $32, %rsi -; X64-NEXT: orq %rax, %rsi -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: shlq $30, %rdi -; X64-NEXT: sarq $30, %rdi -; X64-NEXT: movb (%rax), %al -; X64-NEXT: movzbl %al, %eax -; X64-NEXT: # kill: def $eax killed $eax def $ax -; X64-NEXT: divb %al -; X64-NEXT: movl %eax, %r8d +; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; X64-NEXT: shlq $32, %rcx +; X64-NEXT: orq %rax, %rcx +; X64-NEXT: movq %rcx, %rsi +; X64-NEXT: shlq $30, %rsi +; X64-NEXT: sarq $30, %rsi ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: idivq %rdi +; X64-NEXT: idivq %rsi ; X64-NEXT: movabsq $17179869183, %rax # imm = 0x3FFFFFFFF ; X64-NEXT: andq %rdx, %rax ; X64-NEXT: testb %al, %al -; X64-NEXT: setne %dil +; X64-NEXT: setne %sil ; X64-NEXT: setne (%rax) -; X64-NEXT: cmpq %rsi, %rax -; X64-NEXT: seta %dl -; X64-NEXT: setbe %cl -; X64-NEXT: negb %cl -; X64-NEXT: cmpb %r8b, %al -; X64-NEXT: setle %al +; X64-NEXT: cmpq %rcx, %rax +; X64-NEXT: seta %cl +; X64-NEXT: setbe %dl +; X64-NEXT: negb %dl +; X64-NEXT: cmpb $2, %al +; X64-NEXT: setl %al ; X64-NEXT: negb %al ; X64-NEXT: cbtw -; X64-NEXT: idivb %cl +; X64-NEXT: idivb %dl ; X64-NEXT: movsbl %ah, %eax ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: andl $1, %eax ; X64-NEXT: shlq $3, %rax ; X64-NEXT: negq %rax -; X64-NEXT: negb %dil -; X64-NEXT: negb %dl +; X64-NEXT: negb %sil +; X64-NEXT: negb %cl ; X64-NEXT: leaq -16(%rsp,%rax), %rax ; X64-NEXT: movq %rax, (%rax) -; X64-NEXT: movl %edx, %eax +; X64-NEXT: movl %ecx, %eax ; X64-NEXT: cbtw -; X64-NEXT: idivb %dil +; X64-NEXT: idivb %sil ; X64-NEXT: movsbl %ah, %eax ; X64-NEXT: andb $1, %al ; X64-NEXT: movb %al, (%rax) @@ -230,20 +209,17 @@ ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: subl $8, %esp +; X86-NEXT: .cfi_offset %esi, -16 +; X86-NEXT: .cfi_offset %edi, -12 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %eax ; X86-NEXT: shll $30, %eax ; X86-NEXT: sarl $30, %eax ; X86-NEXT: movl (%esp), %edi -; X86-NEXT: movb (%eax), %bl ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %edi ; X86-NEXT: pushl $0 @@ -251,42 +227,37 @@ ; X86-NEXT: calll __moddi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: andl $3, %edx +; X86-NEXT: testb %al, %al +; X86-NEXT: setne (%eax) ; X86-NEXT: cmpl %eax, %edi ; X86-NEXT: sbbl %edx, %esi -; X86-NEXT: setb %dl -; X86-NEXT: setae %dh +; X86-NEXT: setae %dl +; X86-NEXT: sbbb %cl, %cl ; X86-NEXT: testb %al, %al -; X86-NEXT: setne %bh -; X86-NEXT: setne (%eax) -; X86-NEXT: movzbl %bl, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: subb %dl, %cl -; X86-NEXT: # kill: def $eax killed $eax def $ax -; X86-NEXT: divb %bl -; X86-NEXT: negb %dh -; X86-NEXT: cmpb %al, %al -; X86-NEXT: setle %al +; X86-NEXT: setne %ch +; X86-NEXT: negb %dl +; X86-NEXT: cmpb $2, %al +; X86-NEXT: setl %al ; X86-NEXT: negb %al ; X86-NEXT: cbtw -; X86-NEXT: idivb %dh +; X86-NEXT: idivb %dl ; X86-NEXT: movsbl %ah, %eax ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: shll $3, %eax ; X86-NEXT: negl %eax -; X86-NEXT: negb %bh +; X86-NEXT: negb %ch ; X86-NEXT: leal -8(%esp,%eax), %eax ; X86-NEXT: movl %eax, (%eax) ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: cbtw -; X86-NEXT: idivb %bh +; X86-NEXT: idivb %ch ; X86-NEXT: movsbl %ah, %eax ; X86-NEXT: andb $1, %al ; X86-NEXT: movb %al, (%eax) -; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: leal -8(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl