Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3090,20 +3090,17 @@ if (N0.isUndef()) return DAG.getConstant(0, DL, VT); - // TODO: 0 / X -> 0 - // TODO: 0 % X -> 0 + // 0 / X -> 0 + // 0 % X -> 0 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + if (N0C && N0C->isNullValue()) + return N0; // X / X -> 1 // X % X -> 0 if (N0 == N1) return DAG.getConstant(IsDiv ? 1 : 0, DL, VT); - // TODO: X / 1 -> X - // TODO: X % 1 -> 0 - // If this is a boolean op (single-bit element type), we can't have - // division-by-zero or remainder-by-zero, so assume the divisor is 1. - // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1. - return SDValue(); } Index: test/CodeGen/ARM/fold-sext-sextload.ll =================================================================== --- test/CodeGen/ARM/fold-sext-sextload.ll +++ test/CodeGen/ARM/fold-sext-sextload.ll @@ -1,15 +1,14 @@ ; RUN: llc -mtriple armv7 %s -stop-before=livedebugvalues -o - | FileCheck %s -define <4 x i8> @i(<4 x i8>*) !dbg !8 { - %2 = load <4 x i8>, <4 x i8>* %0, align 4, !dbg !14 +define <4 x i8> @i(<4 x i8>*, <4 x i8>) !dbg !8 { + %3 = load <4 x i8>, <4 x i8>* %0, align 4, !dbg !14 ; CHECK: $[[reg:.*]] = VLD1LNd32 {{.*}} debug-location !14 :: (load 4 from %ir.0) - ; CHECK-NEXT: VMOVLsv8i16 {{.*}} $[[reg]], {{.*}} debug-location !14 - ; CHECK-NEXT: VMOVLsv4i32 {{.*}} $[[reg]], {{.*}} debug-location !14 - - %3 = sdiv <4 x i8> zeroinitializer, %2, !dbg !15 - call void @llvm.dbg.value(metadata <4 x i8> %2, metadata !11, metadata !DIExpression()), !dbg !14 - call void @llvm.dbg.value(metadata <4 x i8> %3, metadata !13, metadata !DIExpression()), !dbg !15 - ret <4 x i8> %3, !dbg !16 + ; CHECK: VMOVLsv8i16 {{.*}} $[[reg]], {{.*}} debug-location !14 + ; CHECK: VMOVLsv4i32 {{.*}} $[[reg]], {{.*}} debug-location !14 + %4 = sdiv <4 x i8> %1, %3, !dbg !15 + call void @llvm.dbg.value(metadata <4 x i8> %3, metadata !11, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata <4 x i8> %4, metadata !13, metadata !DIExpression()), !dbg !15 + ret <4 x i8> %4, !dbg !16 } declare void @llvm.dbg.value(metadata, metadata, metadata) Index: test/CodeGen/ARM/fold-zext-zextload.ll =================================================================== --- test/CodeGen/ARM/fold-zext-zextload.ll +++ test/CodeGen/ARM/fold-zext-zextload.ll @@ -1,15 +1,14 @@ ; RUN: llc -mtriple armv7 %s -stop-before=livedebugvalues -o - | FileCheck %s -define <4 x i8> @i(<4 x i8>*) !dbg !8 { - %2 = load <4 x i8>, <4 x i8>* %0, align 4, !dbg !14 +define <4 x i8> @i(<4 x i8>*, <4 x i8>) !dbg !8 { + %3 = load <4 x i8>, <4 x i8>* %0, align 4, !dbg !14 ; CHECK: $[[reg:.*]] = VLD1LNd32 {{.*}} debug-location !14 :: (load 4 from %ir.0) - ; CHECK-NEXT: VMOVLuv8i16 {{.*}} $[[reg]], {{.*}} debug-location !14 - ; CHECK-NEXT: VMOVLuv4i32 {{.*}} $[[reg]], {{.*}} debug-location !14 - - %3 = udiv <4 x i8> zeroinitializer, %2, !dbg !15 - call void @llvm.dbg.value(metadata <4 x i8> %2, metadata !11, metadata !DIExpression()), !dbg !14 - call void @llvm.dbg.value(metadata <4 x i8> %3, metadata !13, metadata !DIExpression()), !dbg !15 - ret <4 x i8> %3, !dbg !16 + ; CHECK: VMOVLuv8i16 {{.*}} $[[reg]], {{.*}} debug-location !14 + ; CHECK: VMOVLuv4i32 {{.*}} $[[reg]], {{.*}} debug-location !14 + %4 = udiv <4 x i8> %1, %3, !dbg !15 + call void @llvm.dbg.value(metadata <4 x i8> %3, metadata !11, metadata !DIExpression()), !dbg !14 + call void @llvm.dbg.value(metadata <4 x i8> %4, metadata !13, metadata !DIExpression()), !dbg !15 + ret <4 x i8> %4, !dbg !16 } declare void @llvm.dbg.value(metadata, metadata, metadata) Index: test/CodeGen/ARM/vector-extend-narrow.ll =================================================================== --- test/CodeGen/ARM/vector-extend-narrow.ll +++ test/CodeGen/ARM/vector-extend-narrow.ll @@ -1,73 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s +define float @f(<4 x i16>* nocapture %in) { ; CHECK-LABEL: f: -define float @f(<4 x i16>* nocapture %in) { - ; CHECK: vld1 - ; CHECK: vmovl.u16 +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.16 {d16}, [r0:64] +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vcvt.f32.u32 q0, q8 +; CHECK-NEXT: vadd.f32 s4, s0, s1 +; CHECK-NEXT: vadd.f32 s0, s4, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr %1 = load <4 x i16>, <4 x i16>* %in - ; CHECK: vcvt.f32.u32 %2 = uitofp <4 x i16> %1 to <4 x float> %3 = extractelement <4 x float> %2, i32 0 %4 = extractelement <4 x float> %2, i32 1 %5 = extractelement <4 x float> %2, i32 2 - ; CHECK: vadd.f32 %6 = fadd float %3, %4 %7 = fadd float %6, %5 ret float %7 } -; CHECK-LABEL: g: define float @g(<4 x i8>* nocapture %in) { ; Note: vld1 here is reasonably important. Mixing VFP and NEON ; instructions is bad on some cores - ; CHECK: vld1 - ; CHECK: vmovl.u8 - ; CHECK: vmovl.u16 +; CHECK-LABEL: g: +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u8 q8, d16 +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vcvt.f32.u32 q0, q8 +; CHECK-NEXT: vadd.f32 s4, s0, s1 +; CHECK-NEXT: vadd.f32 s0, s4, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr %1 = load <4 x i8>, <4 x i8>* %in - ; CHECK: vcvt.f32.u32 %2 = uitofp <4 x i8> %1 to <4 x float> %3 = extractelement <4 x float> %2, i32 0 %4 = extractelement <4 x float> %2, i32 1 %5 = extractelement <4 x float> %2, i32 2 - ; CHECK: vadd.f32 %6 = fadd float %3, %4 %7 = fadd float %6, %5 ret float %7 } +define <4 x i8> @h(<4 x float> %v) { ; CHECK-LABEL: h: -define <4 x i8> @h(<4 x float> %v) { - ; CHECK: vcvt.{{[us]}}32.f32 - ; CHECK: vmovn.i32 +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d17, r2, r3 +; CHECK-NEXT: vmov d16, r0, r1 +; CHECK-NEXT: vcvt.s32.f32 q8, q8 +; CHECK-NEXT: vmovn.i32 d16, q8 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %1 = fptoui <4 x float> %v to <4 x i8> ret <4 x i8> %1 } -; CHECK-LABEL: i: define <4 x i8> @i(<4 x i8>* %x) { ; Note: vld1 here is reasonably important. Mixing VFP and NEON ; instructions is bad on some cores - ; CHECK: vld1 - ; CHECK: vmovl.s8 - ; CHECK: vmovl.s16 - ; CHECK: vrecpe - ; CHECK: vrecps - ; CHECK: vmul - ; CHECK: vmovn +; CHECK-LABEL: i: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 d16, #0x0 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: bx lr %1 = load <4 x i8>, <4 x i8>* %x, align 4 %2 = sdiv <4 x i8> zeroinitializer, %1 ret <4 x i8> %2 } + +define <4 x i32> @j(<4 x i8>* %in) nounwind { ; CHECK-LABEL: j: -define <4 x i32> @j(<4 x i8>* %in) nounwind { - ; CHECK: vld1 - ; CHECK: vmovl.u8 - ; CHECK: vmovl.u16 - ; CHECK-NOT: vand +; CHECK: @ %bb.0: +; CHECK-NEXT: vld1.32 {d16[0]}, [r0:32] +; CHECK-NEXT: vmovl.u8 q8, d16 +; CHECK-NEXT: vmovl.u16 q8, d16 +; CHECK-NEXT: vmov r0, r1, d16 +; CHECK-NEXT: vmov r2, r3, d17 +; CHECK-NEXT: bx lr %1 = load <4 x i8>, <4 x i8>* %in, align 4 %2 = zext <4 x i8> %1 to <4 x i32> ret <4 x i32> %2 Index: test/CodeGen/X86/combine-sdiv.ll =================================================================== --- test/CodeGen/X86/combine-sdiv.ll +++ test/CodeGen/X86/combine-sdiv.ll @@ -107,99 +107,25 @@ ret <4 x i32> %1 } -; TODO fold (sdiv 0, x) -> 0 +; fold (sdiv 0, x) -> 0 define i32 @combine_sdiv_zero(i32 %x) { ; CHECK-LABEL: combine_sdiv_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivl %edi ; CHECK-NEXT: retq %1 = sdiv i32 0, %x ret i32 %1 } define <4 x i32> @combine_vec_sdiv_zero(<4 x i32> %x) { -; SSE2-LABEL: combine_vec_sdiv_zero: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: idivl %ecx -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: combine_vec_sdiv_zero: +; SSE: # %bb.0: +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: retq ; -; SSE41-LABEL: combine_vec_sdiv_zero: -; SSE41: # %bb.0: -; SSE41-NEXT: pextrd $1, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %esi -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: idivl %esi -; SSE41-NEXT: movd %eax, %xmm1 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE41-NEXT: pextrd $2, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $2, %eax, %xmm1 -; SSE41-NEXT: pextrd $3, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: idivl %ecx -; SSE41-NEXT: pinsrd $3, %eax, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq -; ; AVX-LABEL: combine_vec_sdiv_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %esi -; AVX-NEXT: vmovd %eax, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = sdiv <4 x i32> zeroinitializer, %x ret <4 x i32> %1 Index: test/CodeGen/X86/combine-srem.ll =================================================================== --- test/CodeGen/X86/combine-srem.ll +++ test/CodeGen/X86/combine-srem.ll @@ -100,14 +100,11 @@ ret <4 x i32> %1 } -; TODO fold (srem 0, x) -> 0 +; fold (srem 0, x) -> 0 define i32 @combine_srem_zero(i32 %x) { ; CHECK-LABEL: combine_srem_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: idivl %edi -; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %1 = srem i32 0, %x ret i32 %1 @@ -116,53 +113,12 @@ define <4 x i32> @combine_vec_srem_zero(<4 x i32> %x) { ; SSE-LABEL: combine_vec_srem_zero: ; SSE: # %bb.0: -; SSE-NEXT: pextrd $1, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: idivl %ecx -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %esi -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: idivl %esi -; SSE-NEXT: movd %edx, %xmm1 -; SSE-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE-NEXT: pextrd $2, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $2, %edx, %xmm1 -; SSE-NEXT: pextrd $3, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: idivl %ecx -; SSE-NEXT: pinsrd $3, %edx, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_srem_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: movl %edx, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %esi -; AVX-NEXT: vmovd %edx, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: idivl %ecx -; AVX-NEXT: vpinsrd $3, %edx, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = srem <4 x i32> zeroinitializer, %x ret <4 x i32> %1 Index: test/CodeGen/X86/combine-udiv.ll =================================================================== --- test/CodeGen/X86/combine-udiv.ll +++ test/CodeGen/X86/combine-udiv.ll @@ -90,124 +90,30 @@ ret <4 x i32> %1 } -; TODO fold (udiv 0, x) -> 0 +; fold (udiv 0, x) -> 0 define i32 @combine_udiv_zero(i32 %x) { ; CHECK-LABEL: combine_udiv_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: divl %edi ; CHECK-NEXT: retq %1 = udiv i32 0, %x ret i32 %1 } define <4 x i32> @combine_vec_udiv_zero(<4 x i32> %x) { -; SSE2-LABEL: combine_vec_udiv_zero: -; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; SSE2-NEXT: movd %xmm1, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm2, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm2 -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: movd %xmm0, %ecx -; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: xorl %edx, %edx -; SSE2-NEXT: divl %ecx -; SSE2-NEXT: movd %eax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: combine_vec_udiv_zero: +; SSE: # %bb.0: +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: retq ; -; SSE41-LABEL: combine_vec_udiv_zero: -; SSE41: # %bb.0: -; SSE41-NEXT: pextrd $1, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: movl %eax, %ecx -; SSE41-NEXT: movd %xmm0, %esi -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %esi -; SSE41-NEXT: movd %eax, %xmm1 -; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE41-NEXT: pextrd $2, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: pinsrd $2, %eax, %xmm1 -; SSE41-NEXT: pextrd $3, %xmm0, %ecx -; SSE41-NEXT: xorl %eax, %eax -; SSE41-NEXT: xorl %edx, %edx -; SSE41-NEXT: divl %ecx -; SSE41-NEXT: pinsrd $3, %eax, %xmm1 -; SSE41-NEXT: movdqa %xmm1, %xmm0 -; SSE41-NEXT: retq -; ; AVX-LABEL: combine_vec_udiv_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: movl %eax, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %esi -; AVX-NEXT: vmovd %eax, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; XOP-LABEL: combine_vec_udiv_zero: ; XOP: # %bb.0: -; XOP-NEXT: vpextrd $1, %xmm0, %ecx -; XOP-NEXT: xorl %eax, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: movl %eax, %ecx -; XOP-NEXT: vmovd %xmm0, %esi -; XOP-NEXT: xorl %eax, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %esi -; XOP-NEXT: vmovd %eax, %xmm1 -; XOP-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; XOP-NEXT: vpextrd $2, %xmm0, %ecx -; XOP-NEXT: xorl %eax, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 -; XOP-NEXT: vpextrd $3, %xmm0, %ecx -; XOP-NEXT: xorl %eax, %eax -; XOP-NEXT: xorl %edx, %edx -; XOP-NEXT: divl %ecx -; XOP-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; XOP-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; XOP-NEXT: retq %1 = udiv <4 x i32> zeroinitializer, %x ret <4 x i32> %1 Index: test/CodeGen/X86/combine-urem.ll =================================================================== --- test/CodeGen/X86/combine-urem.ll +++ test/CodeGen/X86/combine-urem.ll @@ -89,14 +89,11 @@ ret <4 x i32> %1 } -; TODO fold (urem 0, x) -> 0 +; fold (urem 0, x) -> 0 define i32 @combine_urem_zero(i32 %x) { ; CHECK-LABEL: combine_urem_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: divl %edi -; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: retq %1 = urem i32 0, %x ret i32 %1 @@ -105,53 +102,12 @@ define <4 x i32> @combine_vec_urem_zero(<4 x i32> %x) { ; SSE-LABEL: combine_vec_urem_zero: ; SSE: # %bb.0: -; SSE-NEXT: pextrd $1, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: movl %edx, %ecx -; SSE-NEXT: movd %xmm0, %esi -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %esi -; SSE-NEXT: movd %edx, %xmm1 -; SSE-NEXT: pinsrd $1, %ecx, %xmm1 -; SSE-NEXT: pextrd $2, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: pinsrd $2, %edx, %xmm1 -; SSE-NEXT: pextrd $3, %xmm0, %ecx -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: xorl %edx, %edx -; SSE-NEXT: divl %ecx -; SSE-NEXT: pinsrd $3, %edx, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_urem_zero: ; AVX: # %bb.0: -; AVX-NEXT: vpextrd $1, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: movl %edx, %ecx -; AVX-NEXT: vmovd %xmm0, %esi -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %esi -; AVX-NEXT: vmovd %edx, %xmm1 -; AVX-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $2, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 -; AVX-NEXT: vpextrd $3, %xmm0, %ecx -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: xorl %edx, %edx -; AVX-NEXT: divl %ecx -; AVX-NEXT: vpinsrd $3, %edx, %xmm1, %xmm0 +; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = urem <4 x i32> zeroinitializer, %x ret <4 x i32> %1 Index: test/CodeGen/X86/copy-eflags.ll =================================================================== --- test/CodeGen/X86/copy-eflags.ll +++ test/CodeGen/X86/copy-eflags.ll @@ -215,76 +215,49 @@ ; X32-NEXT: .cfi_offset %edi, -16 ; X32-NEXT: .cfi_offset %ebx, -12 ; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movb {{[0-9]+}}(%esp), %ch -; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: movb {{[0-9]+}}(%esp), %bl +; X32-NEXT: movb {{[0-9]+}}(%esp), %bh ; X32-NEXT: jmp .LBB3_1 ; X32-NEXT: .p2align 4, 0x90 -; X32-NEXT: .LBB3_5: # %bb1 +; X32-NEXT: .LBB3_3: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: idivl %ebp +; X32-NEXT: movb %bh, (%esi) +; X32-NEXT: movl (%edx), %edi ; X32-NEXT: .LBB3_1: # %bb1 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movsbl %cl, %eax -; X32-NEXT: movl %eax, %edx -; X32-NEXT: sarl $31, %edx -; X32-NEXT: cmpl %eax, %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: sbbl %edx, %eax -; X32-NEXT: setl %al -; X32-NEXT: setl %dl -; X32-NEXT: movzbl %dl, %ebp -; X32-NEXT: negl %ebp -; X32-NEXT: testb %al, %al -; X32-NEXT: jne .LBB3_3 +; X32-NEXT: movsbl %bh, %edi +; X32-NEXT: movl %edi, %ebp +; X32-NEXT: sarl $31, %ebp +; X32-NEXT: cmpl %edi, %ecx +; X32-NEXT: movl %eax, %edi +; X32-NEXT: sbbl %ebp, %edi +; X32-NEXT: jl .LBB3_3 ; X32-NEXT: # %bb.2: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movb %ch, %cl -; X32-NEXT: .LBB3_3: # %bb1 -; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movb %cl, (%ebx) -; X32-NEXT: movl (%edi), %edx -; X32-NEXT: testb %al, %al -; X32-NEXT: jne .LBB3_5 -; X32-NEXT: # %bb.4: # %bb1 -; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X32-NEXT: movl %edx, %ebp -; X32-NEXT: jmp .LBB3_5 +; X32-NEXT: movb %bl, %bh +; X32-NEXT: jmp .LBB3_3 ; ; X64-LABEL: PR37100: ; X64: # %bb.0: # %bb -; X64-NEXT: movq %rdx, %r10 ; X64-NEXT: jmp .LBB3_1 ; X64-NEXT: .p2align 4, 0x90 -; X64-NEXT: .LBB3_5: # %bb1 +; X64-NEXT: .LBB3_3: # %bb1 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: idivl %esi +; X64-NEXT: movb %dil, (%r8) +; X64-NEXT: movl (%r9), %eax ; X64-NEXT: .LBB3_1: # %bb1 ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movsbq %dil, %rax -; X64-NEXT: xorl %esi, %esi -; X64-NEXT: cmpq %rax, %r10 -; X64-NEXT: setl %sil -; X64-NEXT: negl %esi -; X64-NEXT: cmpq %rax, %r10 +; X64-NEXT: cmpq %rax, %rdx ; X64-NEXT: jl .LBB3_3 ; X64-NEXT: # %bb.2: # %bb1 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X64-NEXT: movl %ecx, %edi -; X64-NEXT: .LBB3_3: # %bb1 -; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X64-NEXT: movb %dil, (%r8) -; X64-NEXT: jl .LBB3_5 -; X64-NEXT: # %bb.4: # %bb1 -; X64-NEXT: # in Loop: Header=BB3_1 Depth=1 -; X64-NEXT: movl (%r9), %esi -; X64-NEXT: jmp .LBB3_5 +; X64-NEXT: jmp .LBB3_3 bb: br label %bb1 @@ -315,40 +288,27 @@ ; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: .cfi_offset %esi, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl (%eax), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: sarl $31, %ecx -; X32-NEXT: cmpl %eax, %eax -; X32-NEXT: sbbl %ecx, %eax -; X32-NEXT: setb %al -; X32-NEXT: sbbb %cl, %cl -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movb %cl, (%edx) -; X32-NEXT: movzbl %al, %eax -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: idivl %ecx -; X32-NEXT: movb %dl, (%esi) +; X32-NEXT: movl (%edx), %edx +; X32-NEXT: movl %edx, %esi +; X32-NEXT: sarl $31, %esi +; X32-NEXT: cmpl %edx, %eax +; X32-NEXT: sbbl %esi, %edx +; X32-NEXT: sbbb %dl, %dl +; X32-NEXT: movb %dl, (%ecx) +; X32-NEXT: movb $0, (%eax) ; X32-NEXT: popl %esi ; X32-NEXT: .cfi_def_cfa_offset 4 ; X32-NEXT: retl ; ; X64-LABEL: PR37431: ; X64: # %bb.0: # %entry -; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movslq (%rdi), %rax ; X64-NEXT: cmpq %rax, %rax -; X64-NEXT: sbbb %dl, %dl -; X64-NEXT: cmpq %rax, %rax -; X64-NEXT: movb %dl, (%rsi) -; X64-NEXT: sbbl %esi, %esi -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: idivl %esi -; X64-NEXT: movb %dl, (%rcx) +; X64-NEXT: sbbb %al, %al +; X64-NEXT: movb %al, (%rsi) +; X64-NEXT: movb $0, (%rdx) ; X64-NEXT: retq entry: %tmp = load i32, i32* %arg1 Index: test/CodeGen/X86/pr32282.ll =================================================================== --- test/CodeGen/X86/pr32282.ll +++ test/CodeGen/X86/pr32282.ll @@ -12,34 +12,10 @@ define void @foo() { ; X86-LABEL: foo: ; X86: # %bb.0: -; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: movl d, %eax -; X86-NEXT: notl %eax -; X86-NEXT: movl d+4, %ecx -; X86-NEXT: notl %ecx -; X86-NEXT: andl $701685459, %ecx # imm = 0x29D2DED3 -; X86-NEXT: andl $-564453154, %eax # imm = 0xDE5B20DE -; X86-NEXT: shrdl $21, %ecx, %eax -; X86-NEXT: shrl $21, %ecx -; X86-NEXT: andl $-2, %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: addl $7, %eax -; X86-NEXT: adcxl %edx, %ecx -; X86-NEXT: pushl %ecx -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl $0 -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: pushl $0 -; X86-NEXT: .cfi_adjust_cfa_offset 4 -; X86-NEXT: calll __divdi3 -; X86-NEXT: addl $16, %esp -; X86-NEXT: .cfi_adjust_cfa_offset -16 -; X86-NEXT: orl %eax, %edx -; X86-NEXT: setne {{[0-9]+}}(%esp) -; X86-NEXT: popl %eax +; X86-NEXT: subl $1, %esp +; X86-NEXT: .cfi_def_cfa_offset 5 +; X86-NEXT: movb $0, (%esp) +; X86-NEXT: addl $1, %esp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl ; @@ -47,25 +23,12 @@ ; X64: # %bb.0: ; X64-NEXT: movq {{.*}}(%rip), %rax ; X64-NEXT: movabsq $3013716102212485120, %rcx # imm = 0x29D2DED3DE400000 -; X64-NEXT: andnq %rcx, %rax, %rcx -; X64-NEXT: shrq $21, %rcx -; X64-NEXT: addq $7, %rcx -; X64-NEXT: movabsq $4393751543808, %rax # imm = 0x3FF00000000 -; X64-NEXT: testq %rax, %rcx -; X64-NEXT: je .LBB0_1 -; X64-NEXT: # %bb.2: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divq %rcx -; X64-NEXT: jmp .LBB0_3 -; X64-NEXT: .LBB0_1: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: divl %ecx -; X64-NEXT: # kill: def $eax killed $eax def $rax -; X64-NEXT: .LBB0_3: -; X64-NEXT: testq %rax, %rax -; X64-NEXT: setne -{{[0-9]+}}(%rsp) +; X64-NEXT: andnq %rcx, %rax, %rax +; X64-NEXT: shrq $21, %rax +; X64-NEXT: addq $7, %rax +; X64-NEXT: movabsq $4393751543808, %rcx # imm = 0x3FF00000000 +; X64-NEXT: testq %rcx, %rax +; X64-NEXT: movb $0, -{{[0-9]+}}(%rsp) ; X64-NEXT: retq %1 = alloca i8, align 1 %2 = load i64, i64* @d, align 8 Index: test/CodeGen/X86/pr38539.ll =================================================================== --- test/CodeGen/X86/pr38539.ll +++ test/CodeGen/X86/pr38539.ll @@ -6,68 +6,22 @@ define void @f() { ; X64-LABEL: f: ; X64: # %bb.0: # %BB -; X64-NEXT: pushq %rbp -; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: pushq %r14 -; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: pushq %rbx -; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: subq $16, %rsp -; X64-NEXT: .cfi_def_cfa_offset 48 -; X64-NEXT: .cfi_offset %rbx, -32 -; X64-NEXT: .cfi_offset %r14, -24 -; X64-NEXT: .cfi_offset %rbp, -16 -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; X64-NEXT: movq %rbp, %rcx -; X64-NEXT: shlq $62, %rcx -; X64-NEXT: sarq $62, %rcx -; X64-NEXT: movq (%rsp), %rbx ; X64-NEXT: movb (%rax), %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: # kill: def $eax killed $eax def $ax ; X64-NEXT: divb (%rax) -; X64-NEXT: movl %eax, %r14d -; X64-NEXT: xorl %edi, %edi -; X64-NEXT: xorl %esi, %esi -; X64-NEXT: movq %rbx, %rdx -; X64-NEXT: callq __modti3 -; X64-NEXT: andl $3, %edx -; X64-NEXT: testb %al, %al -; X64-NEXT: setne (%rax) -; X64-NEXT: cmpq %rax, %rbx -; X64-NEXT: sbbq %rdx, %rbp -; X64-NEXT: setae %dl -; X64-NEXT: sbbb %cl, %cl -; X64-NEXT: testb %al, %al -; X64-NEXT: setne %bl -; X64-NEXT: negb %dl -; X64-NEXT: cmpb %r14b, %al +; X64-NEXT: cmpb %al, %al ; X64-NEXT: setle %al -; X64-NEXT: negb %al -; X64-NEXT: cbtw -; X64-NEXT: idivb %dl -; X64-NEXT: movsbl %ah, %eax +; X64-NEXT: addb %al, %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: andl $1, %eax ; X64-NEXT: shlq $4, %rax ; X64-NEXT: negq %rax -; X64-NEXT: negb %bl -; X64-NEXT: leaq -16(%rsp,%rax), %rax +; X64-NEXT: testb %al, %al +; X64-NEXT: setne (%rax) +; X64-NEXT: leaq -32(%rsp,%rax), %rax ; X64-NEXT: movq %rax, (%rax) -; X64-NEXT: movl %ecx, %eax -; X64-NEXT: cbtw -; X64-NEXT: idivb %bl -; X64-NEXT: movsbl %ah, %eax -; X64-NEXT: andb $1, %al -; X64-NEXT: movb %al, (%rax) -; X64-NEXT: addq $16, %rsp -; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: popq %rbx -; X64-NEXT: .cfi_def_cfa_offset 24 -; X64-NEXT: popq %r14 -; X64-NEXT: .cfi_def_cfa_offset 16 -; X64-NEXT: popq %rbp -; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: movb $0, (%rax) ; X64-NEXT: retq ; ; X86-LABEL: f: @@ -77,75 +31,25 @@ ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $48, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: shll $30, %ecx -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: sarl $30, %edx -; X86-NEXT: sarl $31, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: subl $16, %esp ; X86-NEXT: movb (%eax), %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: # kill: def $eax killed $eax def $ax ; X86-NEXT: divb (%eax) -; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %edx -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: pushl %eax -; X86-NEXT: calll __modti3 -; X86-NEXT: addl $32, %esp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $3, %eax -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl %eax, %esi -; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: setae %dl -; X86-NEXT: sbbb %cl, %cl +; X86-NEXT: cmpb %al, %al +; X86-NEXT: setle %al +; X86-NEXT: addb %al, %al ; X86-NEXT: testb %al, %al -; X86-NEXT: setne %ch ; X86-NEXT: setne (%eax) -; X86-NEXT: negb %ch -; X86-NEXT: negb %dl -; X86-NEXT: cmpb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload -; X86-NEXT: setle %al -; X86-NEXT: negb %al -; X86-NEXT: cbtw -; X86-NEXT: idivb %dl -; X86-NEXT: movsbl %ah, %eax ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: negl %eax ; X86-NEXT: leal (%eax,%eax,2), %eax -; X86-NEXT: leal -4(%esp,%eax,4), %eax +; X86-NEXT: leal -12(%esp,%eax,4), %eax ; X86-NEXT: movl %eax, (%eax) -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cbtw -; X86-NEXT: idivb %ch -; X86-NEXT: movsbl %ah, %eax -; X86-NEXT: andb $1, %al -; X86-NEXT: movb %al, (%eax) -; X86-NEXT: leal -12(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: movb $0, (%eax) +; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl @@ -177,50 +81,22 @@ define void @g() { ; X64-LABEL: g: ; X64: # %bb.0: # %BB -; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %esi -; X64-NEXT: shlq $32, %rsi -; X64-NEXT: orq %rax, %rsi -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: shlq $30, %rdi -; X64-NEXT: sarq $30, %rdi ; X64-NEXT: movb (%rax), %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: # kill: def $eax killed $eax def $ax ; X64-NEXT: divb (%rax) -; X64-NEXT: movl %eax, %r8d -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: xorl %edx, %edx -; X64-NEXT: idivq %rdi -; X64-NEXT: movabsq $17179869183, %rax # imm = 0x3FFFFFFFF -; X64-NEXT: andq %rdx, %rax -; X64-NEXT: testb %al, %al -; X64-NEXT: setne %dil -; X64-NEXT: setne (%rax) -; X64-NEXT: cmpq %rsi, %rax -; X64-NEXT: seta %dl -; X64-NEXT: setbe %cl -; X64-NEXT: negb %cl -; X64-NEXT: cmpb %r8b, %al +; X64-NEXT: cmpb %al, %al ; X64-NEXT: setle %al -; X64-NEXT: negb %al -; X64-NEXT: cbtw -; X64-NEXT: idivb %cl -; X64-NEXT: movsbl %ah, %eax +; X64-NEXT: addb %al, %al ; X64-NEXT: movzbl %al, %eax ; X64-NEXT: andl $1, %eax ; X64-NEXT: shlq $3, %rax ; X64-NEXT: negq %rax -; X64-NEXT: negb %dil -; X64-NEXT: negb %dl +; X64-NEXT: testb %al, %al +; X64-NEXT: setne (%rax) ; X64-NEXT: leaq -16(%rsp,%rax), %rax ; X64-NEXT: movq %rax, (%rax) -; X64-NEXT: movl %edx, %eax -; X64-NEXT: cbtw -; X64-NEXT: idivb %dil -; X64-NEXT: movsbl %ah, %eax -; X64-NEXT: andb $1, %al -; X64-NEXT: movb %al, (%rax) +; X64-NEXT: movb $0, (%rax) ; X64-NEXT: retq ; ; X86-LABEL: g: @@ -230,63 +106,25 @@ ; X86-NEXT: .cfi_offset %ebp, -8 ; X86-NEXT: movl %esp, %ebp ; X86-NEXT: .cfi_def_cfa_register %ebp -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %esi, %ecx -; X86-NEXT: shll $30, %ecx -; X86-NEXT: sarl $30, %ecx -; X86-NEXT: movl (%esp), %edi +; X86-NEXT: subl $8, %esp ; X86-NEXT: movb (%eax), %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: # kill: def $eax killed $eax def $ax ; X86-NEXT: divb (%eax) -; X86-NEXT: movl %eax, %ebx -; X86-NEXT: pushl %ecx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl $0 -; X86-NEXT: pushl $0 -; X86-NEXT: calll __moddi3 -; X86-NEXT: addl $16, %esp -; X86-NEXT: andl $3, %edx -; X86-NEXT: testb %al, %al -; X86-NEXT: setne (%eax) -; X86-NEXT: cmpl %eax, %edi -; X86-NEXT: sbbl %edx, %esi -; X86-NEXT: setae %dl -; X86-NEXT: sbbb %cl, %cl -; X86-NEXT: testb %al, %al -; X86-NEXT: setne %ch -; X86-NEXT: negb %dl -; X86-NEXT: cmpb %bl, %al +; X86-NEXT: cmpb %al, %al ; X86-NEXT: setle %al -; X86-NEXT: negb %al -; X86-NEXT: cbtw -; X86-NEXT: idivb %dl -; X86-NEXT: movsbl %ah, %eax +; X86-NEXT: addb %al, %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: shll $3, %eax ; X86-NEXT: negl %eax -; X86-NEXT: negb %ch +; X86-NEXT: testb %al, %al +; X86-NEXT: setne (%eax) ; X86-NEXT: leal -8(%esp,%eax), %eax ; X86-NEXT: movl %eax, (%eax) -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cbtw -; X86-NEXT: idivb %ch -; X86-NEXT: movsbl %ah, %eax -; X86-NEXT: andb $1, %al -; X86-NEXT: movb %al, (%eax) -; X86-NEXT: leal -12(%ebp), %esp -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx +; X86-NEXT: movb $0, (%eax) +; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl Index: test/CodeGen/X86/shrink_vmul.ll =================================================================== --- test/CodeGen/X86/shrink_vmul.ll +++ test/CodeGen/X86/shrink_vmul.ll @@ -2199,383 +2199,33 @@ define void @PR34947() { ; X86-SSE-LABEL: PR34947: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movdqa (%eax), %xmm0 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; X86-SSE-NEXT: movd %xmm1, %ecx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx -; X86-SSE-NEXT: movd %edx, %xmm1 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; X86-SSE-NEXT: movd %xmm2, %ecx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx -; X86-SSE-NEXT: movd %edx, %xmm2 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X86-SSE-NEXT: movd %xmm0, %ecx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx -; X86-SSE-NEXT: movd %edx, %xmm1 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE-NEXT: movd %xmm0, %ecx -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl %ecx -; X86-SSE-NEXT: movd %edx, %xmm0 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; X86-SSE-NEXT: xorl %eax, %eax -; X86-SSE-NEXT: xorl %edx, %edx -; X86-SSE-NEXT: divl (%eax) -; X86-SSE-NEXT: movd %edx, %xmm0 -; X86-SSE-NEXT: pmaddwd {{\.LCPI.*}}, %xmm1 -; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007 -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: pmuludq %xmm0, %xmm2 -; X86-SSE-NEXT: movd %xmm2, (%eax) -; X86-SSE-NEXT: movdqa %xmm1, (%eax) +; X86-SSE-NEXT: xorps %xmm0, %xmm0 +; X86-SSE-NEXT: movaps %xmm0, (%eax) +; X86-SSE-NEXT: movl $0, (%eax) ; X86-SSE-NEXT: retl ; -; X86-AVX1-LABEL: PR34947: -; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: pushl %ebp -; X86-AVX1-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX1-NEXT: pushl %ebx -; X86-AVX1-NEXT: .cfi_def_cfa_offset 12 -; X86-AVX1-NEXT: pushl %edi -; X86-AVX1-NEXT: .cfi_def_cfa_offset 16 -; X86-AVX1-NEXT: pushl %esi -; X86-AVX1-NEXT: .cfi_def_cfa_offset 20 -; X86-AVX1-NEXT: subl $16, %esp -; X86-AVX1-NEXT: .cfi_def_cfa_offset 36 -; X86-AVX1-NEXT: .cfi_offset %esi, -20 -; X86-AVX1-NEXT: .cfi_offset %edi, -16 -; X86-AVX1-NEXT: .cfi_offset %ebx, -12 -; X86-AVX1-NEXT: .cfi_offset %ebp, -8 -; X86-AVX1-NEXT: vmovdqa (%eax), %ymm0 -; X86-AVX1-NEXT: xorl %eax, %eax -; X86-AVX1-NEXT: xorl %edx, %edx -; X86-AVX1-NEXT: divl (%eax) -; X86-AVX1-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-AVX1-NEXT: vpextrd $3, %xmm0, %ecx -; X86-AVX1-NEXT: xorl %eax, %eax -; X86-AVX1-NEXT: xorl %edx, %edx -; X86-AVX1-NEXT: divl %ecx -; X86-AVX1-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-AVX1-NEXT: vpextrd $2, %xmm0, %ecx -; X86-AVX1-NEXT: xorl %eax, %eax -; X86-AVX1-NEXT: xorl %edx, %edx -; X86-AVX1-NEXT: divl %ecx -; X86-AVX1-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill -; X86-AVX1-NEXT: vpextrd $1, %xmm0, %ecx -; X86-AVX1-NEXT: xorl %eax, %eax -; X86-AVX1-NEXT: xorl %edx, %edx -; X86-AVX1-NEXT: divl %ecx -; X86-AVX1-NEXT: movl %edx, (%esp) # 4-byte Spill -; X86-AVX1-NEXT: vmovd %xmm0, %ecx -; X86-AVX1-NEXT: xorl %eax, %eax -; X86-AVX1-NEXT: xorl %edx, %edx -; X86-AVX1-NEXT: divl %ecx -; X86-AVX1-NEXT: movl %edx, %ebp -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X86-AVX1-NEXT: xorl %eax, %eax -; X86-AVX1-NEXT: xorl %edx, %edx -; X86-AVX1-NEXT: vpextrd $3, %xmm0, %ecx -; X86-AVX1-NEXT: divl %ecx -; X86-AVX1-NEXT: movl %edx, %ecx -; X86-AVX1-NEXT: xorl %eax, %eax -; X86-AVX1-NEXT: xorl %edx, %edx -; X86-AVX1-NEXT: vpextrd $2, %xmm0, %esi -; X86-AVX1-NEXT: divl %esi -; X86-AVX1-NEXT: movl %edx, %esi -; X86-AVX1-NEXT: xorl %eax, %eax -; X86-AVX1-NEXT: xorl %edx, %edx -; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edi -; X86-AVX1-NEXT: divl %edi -; X86-AVX1-NEXT: movl %edx, %edi -; X86-AVX1-NEXT: xorl %eax, %eax -; X86-AVX1-NEXT: xorl %edx, %edx -; X86-AVX1-NEXT: vmovd %xmm0, %ebx -; X86-AVX1-NEXT: divl %ebx -; X86-AVX1-NEXT: vmovd %edx, %xmm0 -; X86-AVX1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 -; X86-AVX1-NEXT: vmovd %ebp, %xmm1 -; X86-AVX1-NEXT: vpinsrd $1, (%esp), %xmm1, %xmm1 # 4-byte Folded Reload -; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 # 4-byte Folded Reload -; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 # 4-byte Folded Reload -; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm2 # 4-byte Folded Reload -; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero -; X86-AVX1-NEXT: movl $8199, %eax # imm = 0x2007 -; X86-AVX1-NEXT: vmovd %eax, %xmm3 -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [8199,8199,8199,8199] -; X86-AVX1-NEXT: vpmaddwd %xmm4, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpmaddwd %xmm4, %xmm1, %xmm1 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X86-AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm1 -; X86-AVX1-NEXT: vmovd %xmm1, (%eax) -; X86-AVX1-NEXT: vmovaps %ymm0, (%eax) -; X86-AVX1-NEXT: addl $16, %esp -; X86-AVX1-NEXT: .cfi_def_cfa_offset 20 -; X86-AVX1-NEXT: popl %esi -; X86-AVX1-NEXT: .cfi_def_cfa_offset 16 -; X86-AVX1-NEXT: popl %edi -; X86-AVX1-NEXT: .cfi_def_cfa_offset 12 -; X86-AVX1-NEXT: popl %ebx -; X86-AVX1-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX1-NEXT: popl %ebp -; X86-AVX1-NEXT: .cfi_def_cfa_offset 4 -; X86-AVX1-NEXT: vzeroupper -; X86-AVX1-NEXT: retl +; X86-AVX-LABEL: PR34947: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-AVX-NEXT: vmovaps %ymm0, (%eax) +; X86-AVX-NEXT: movl $0, (%eax) +; X86-AVX-NEXT: vzeroupper +; X86-AVX-NEXT: retl ; -; X86-AVX2-LABEL: PR34947: -; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: pushl %esi -; X86-AVX2-NEXT: .cfi_def_cfa_offset 8 -; X86-AVX2-NEXT: .cfi_offset %esi, -8 -; X86-AVX2-NEXT: vmovdqa (%eax), %ymm0 -; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X86-AVX2-NEXT: vpextrd $1, %xmm1, %ecx -; X86-AVX2-NEXT: xorl %eax, %eax -; X86-AVX2-NEXT: xorl %edx, %edx -; X86-AVX2-NEXT: divl %ecx -; X86-AVX2-NEXT: movl %edx, %ecx -; X86-AVX2-NEXT: vmovd %xmm1, %esi -; X86-AVX2-NEXT: xorl %eax, %eax -; X86-AVX2-NEXT: xorl %edx, %edx -; X86-AVX2-NEXT: divl %esi -; X86-AVX2-NEXT: vmovd %edx, %xmm2 -; X86-AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; X86-AVX2-NEXT: vpextrd $2, %xmm1, %ecx -; X86-AVX2-NEXT: xorl %eax, %eax -; X86-AVX2-NEXT: xorl %edx, %edx -; X86-AVX2-NEXT: divl %ecx -; X86-AVX2-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; X86-AVX2-NEXT: vpextrd $3, %xmm1, %ecx -; X86-AVX2-NEXT: xorl %eax, %eax -; X86-AVX2-NEXT: xorl %edx, %edx -; X86-AVX2-NEXT: divl %ecx -; X86-AVX2-NEXT: vpinsrd $3, %edx, %xmm2, %xmm1 -; X86-AVX2-NEXT: vpextrd $1, %xmm0, %ecx -; X86-AVX2-NEXT: xorl %eax, %eax -; X86-AVX2-NEXT: xorl %edx, %edx -; X86-AVX2-NEXT: divl %ecx -; X86-AVX2-NEXT: movl %edx, %ecx -; X86-AVX2-NEXT: vmovd %xmm0, %esi -; X86-AVX2-NEXT: xorl %eax, %eax -; X86-AVX2-NEXT: xorl %edx, %edx -; X86-AVX2-NEXT: divl %esi -; X86-AVX2-NEXT: vmovd %edx, %xmm2 -; X86-AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; X86-AVX2-NEXT: vpextrd $2, %xmm0, %ecx -; X86-AVX2-NEXT: xorl %eax, %eax -; X86-AVX2-NEXT: xorl %edx, %edx -; X86-AVX2-NEXT: divl %ecx -; X86-AVX2-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; X86-AVX2-NEXT: vpextrd $3, %xmm0, %ecx -; X86-AVX2-NEXT: xorl %eax, %eax -; X86-AVX2-NEXT: xorl %edx, %edx -; X86-AVX2-NEXT: divl %ecx -; X86-AVX2-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 -; X86-AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; X86-AVX2-NEXT: xorl %eax, %eax -; X86-AVX2-NEXT: xorl %edx, %edx -; X86-AVX2-NEXT: divl (%eax) -; X86-AVX2-NEXT: vmovd %edx, %xmm1 -; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199] -; X86-AVX2-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0 -; X86-AVX2-NEXT: movl $8199, %eax # imm = 0x2007 -; X86-AVX2-NEXT: vmovd %eax, %xmm2 -; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1 -; X86-AVX2-NEXT: vmovd %xmm1, (%eax) -; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax) -; X86-AVX2-NEXT: popl %esi -; X86-AVX2-NEXT: .cfi_def_cfa_offset 4 -; X86-AVX2-NEXT: vzeroupper -; X86-AVX2-NEXT: retl -; ; X64-SSE-LABEL: PR34947: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movdqa (%rax), %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; X64-SSE-NEXT: movd %xmm1, %ecx -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %ecx -; X64-SSE-NEXT: movd %edx, %xmm1 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] -; X64-SSE-NEXT: movd %xmm2, %ecx -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %ecx -; X64-SSE-NEXT: movd %edx, %xmm2 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X64-SSE-NEXT: movd %xmm0, %ecx -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %ecx -; X64-SSE-NEXT: movd %edx, %xmm1 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE-NEXT: movd %xmm0, %ecx -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl %ecx -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; X64-SSE-NEXT: xorl %eax, %eax -; X64-SSE-NEXT: xorl %edx, %edx -; X64-SSE-NEXT: divl (%rax) -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pmaddwd {{.*}}(%rip), %xmm1 -; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007 -; X64-SSE-NEXT: movd %eax, %xmm2 -; X64-SSE-NEXT: pmuludq %xmm0, %xmm2 -; X64-SSE-NEXT: movd %xmm2, (%rax) -; X64-SSE-NEXT: movdqa %xmm1, (%rax) +; X64-SSE-NEXT: xorps %xmm0, %xmm0 +; X64-SSE-NEXT: movaps %xmm0, (%rax) +; X64-SSE-NEXT: movl $0, (%rax) ; X64-SSE-NEXT: retq ; -; X64-AVX1-LABEL: PR34947: -; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: pushq %rbp -; X64-AVX1-NEXT: .cfi_def_cfa_offset 16 -; X64-AVX1-NEXT: pushq %rbx -; X64-AVX1-NEXT: .cfi_def_cfa_offset 24 -; X64-AVX1-NEXT: .cfi_offset %rbx, -24 -; X64-AVX1-NEXT: .cfi_offset %rbp, -16 -; X64-AVX1-NEXT: vmovdqa (%rax), %ymm0 -; X64-AVX1-NEXT: xorl %eax, %eax -; X64-AVX1-NEXT: xorl %edx, %edx -; X64-AVX1-NEXT: divl (%rax) -; X64-AVX1-NEXT: movl %edx, %r8d -; X64-AVX1-NEXT: vpextrd $3, %xmm0, %ecx -; X64-AVX1-NEXT: xorl %eax, %eax -; X64-AVX1-NEXT: xorl %edx, %edx -; X64-AVX1-NEXT: divl %ecx -; X64-AVX1-NEXT: movl %edx, %r9d -; X64-AVX1-NEXT: vpextrd $2, %xmm0, %ecx -; X64-AVX1-NEXT: xorl %eax, %eax -; X64-AVX1-NEXT: xorl %edx, %edx -; X64-AVX1-NEXT: divl %ecx -; X64-AVX1-NEXT: movl %edx, %r10d -; X64-AVX1-NEXT: vpextrd $1, %xmm0, %ecx -; X64-AVX1-NEXT: xorl %eax, %eax -; X64-AVX1-NEXT: xorl %edx, %edx -; X64-AVX1-NEXT: divl %ecx -; X64-AVX1-NEXT: movl %edx, %r11d -; X64-AVX1-NEXT: vmovd %xmm0, %ecx -; X64-AVX1-NEXT: xorl %eax, %eax -; X64-AVX1-NEXT: xorl %edx, %edx -; X64-AVX1-NEXT: divl %ecx -; X64-AVX1-NEXT: movl %edx, %esi -; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X64-AVX1-NEXT: vpextrd $3, %xmm0, %ecx -; X64-AVX1-NEXT: xorl %eax, %eax -; X64-AVX1-NEXT: xorl %edx, %edx -; X64-AVX1-NEXT: divl %ecx -; X64-AVX1-NEXT: movl %edx, %edi -; X64-AVX1-NEXT: vpextrd $2, %xmm0, %ecx -; X64-AVX1-NEXT: xorl %eax, %eax -; X64-AVX1-NEXT: xorl %edx, %edx -; X64-AVX1-NEXT: divl %ecx -; X64-AVX1-NEXT: movl %edx, %ecx -; X64-AVX1-NEXT: vpextrd $1, %xmm0, %ebx -; X64-AVX1-NEXT: xorl %eax, %eax -; X64-AVX1-NEXT: xorl %edx, %edx -; X64-AVX1-NEXT: divl %ebx -; X64-AVX1-NEXT: movl %edx, %ebx -; X64-AVX1-NEXT: vmovd %xmm0, %ebp -; X64-AVX1-NEXT: xorl %eax, %eax -; X64-AVX1-NEXT: xorl %edx, %edx -; X64-AVX1-NEXT: divl %ebp -; X64-AVX1-NEXT: vmovd %edx, %xmm0 -; X64-AVX1-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8199,8199,8199,8199] -; X64-AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovd %esi, %xmm2 -; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2 -; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2 -; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2 -; X64-AVX1-NEXT: vpmaddwd %xmm1, %xmm2, %xmm1 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X64-AVX1-NEXT: vmovd %r8d, %xmm1 -; X64-AVX1-NEXT: movl $8199, %eax # imm = 0x2007 -; X64-AVX1-NEXT: vmovd %eax, %xmm2 -; X64-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1 -; X64-AVX1-NEXT: vmovd %xmm1, (%rax) -; X64-AVX1-NEXT: vmovaps %ymm0, (%rax) -; X64-AVX1-NEXT: popq %rbx -; X64-AVX1-NEXT: .cfi_def_cfa_offset 16 -; X64-AVX1-NEXT: popq %rbp -; X64-AVX1-NEXT: .cfi_def_cfa_offset 8 -; X64-AVX1-NEXT: vzeroupper -; X64-AVX1-NEXT: retq -; -; X64-AVX2-LABEL: PR34947: -; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vmovdqa (%rax), %ymm0 -; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; X64-AVX2-NEXT: vpextrd $1, %xmm1, %ecx -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: xorl %edx, %edx -; X64-AVX2-NEXT: divl %ecx -; X64-AVX2-NEXT: movl %edx, %ecx -; X64-AVX2-NEXT: vmovd %xmm1, %esi -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: xorl %edx, %edx -; X64-AVX2-NEXT: divl %esi -; X64-AVX2-NEXT: vmovd %edx, %xmm2 -; X64-AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; X64-AVX2-NEXT: vpextrd $2, %xmm1, %ecx -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: xorl %edx, %edx -; X64-AVX2-NEXT: divl %ecx -; X64-AVX2-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; X64-AVX2-NEXT: vpextrd $3, %xmm1, %ecx -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: xorl %edx, %edx -; X64-AVX2-NEXT: divl %ecx -; X64-AVX2-NEXT: vpinsrd $3, %edx, %xmm2, %xmm1 -; X64-AVX2-NEXT: vpextrd $1, %xmm0, %ecx -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: xorl %edx, %edx -; X64-AVX2-NEXT: divl %ecx -; X64-AVX2-NEXT: movl %edx, %ecx -; X64-AVX2-NEXT: vmovd %xmm0, %esi -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: xorl %edx, %edx -; X64-AVX2-NEXT: divl %esi -; X64-AVX2-NEXT: vmovd %edx, %xmm2 -; X64-AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 -; X64-AVX2-NEXT: vpextrd $2, %xmm0, %ecx -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: xorl %edx, %edx -; X64-AVX2-NEXT: divl %ecx -; X64-AVX2-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 -; X64-AVX2-NEXT: vpextrd $3, %xmm0, %ecx -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: xorl %edx, %edx -; X64-AVX2-NEXT: divl %ecx -; X64-AVX2-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0 -; X64-AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; X64-AVX2-NEXT: xorl %eax, %eax -; X64-AVX2-NEXT: xorl %edx, %edx -; X64-AVX2-NEXT: divl (%rax) -; X64-AVX2-NEXT: vmovd %edx, %xmm1 -; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199] -; X64-AVX2-NEXT: vpmaddwd %ymm2, %ymm0, %ymm0 -; X64-AVX2-NEXT: movl $8199, %eax # imm = 0x2007 -; X64-AVX2-NEXT: vmovd %eax, %xmm2 -; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vmovd %xmm1, (%rax) -; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax) -; X64-AVX2-NEXT: vzeroupper -; X64-AVX2-NEXT: retq +; X64-AVX-LABEL: PR34947: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-AVX-NEXT: vmovaps %ymm0, (%rax) +; X64-AVX-NEXT: movl $0, (%rax) +; X64-AVX-NEXT: vzeroupper +; X64-AVX-NEXT: retq %tmp = load <9 x i32>, <9 x i32>* undef, align 64 %rem = urem <9 x i32> zeroinitializer, %tmp %mul = mul <9 x i32> , %rem