Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4484,10 +4484,11 @@ DAGCombinerInfo &DCI, const SDLoc &DL, SmallVectorImpl &Created) const { // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) - // - D must be constant, bigger than 1, with D = D0 * 2^K where D0 is odd + // - D must be constant, with D = D0 * 2^K where D0 is odd // - P is the multiplicative inverse of D0 modulo 2^W // - Q = floor((2^W - 1) / D0) // where W is the width of the common type of N and D. + // Normally, the fold is invalid for D = 1, but we can still special-case it. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons."); @@ -4507,16 +4508,43 @@ if (!CompTarget || !CompTarget->isNullValue()) return SDValue(); + bool AllDivisorsAreOnes = true; bool HadEvenDivisor = false; bool AllDivisorsArePowerOfTwo = true; SmallVector PAmts, KAmts, QAmts; auto BuildUREMPattern = [&](ConstantSDNode *C) { - // This fold is only valid for D > 1. (power-of-two is ok.) - if (C->isNullValue() || C->isOne()) + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) return false; const APInt &D = C->getAPIntValue(); + unsigned W = D.getBitWidth(); + + // No integer exists for which there is remainder after division by 1. + // Incidentally, this fold is only valid for D u> 1. (power-of-two is ok.) + // While we could just bailout and let it constant-fold for scalars and + // splat vectors, we *do* want to handle non-splat vectors where at least + // one divisor happens to be '1'. + if (C->isOne()) { + // We will end up with the following: + // (setule/setugt (rotr (mul N, P), K), Q) + // We know that (X u% 1) == 0 can be constant-folded to 1, + // (X u% 1) != 0 can be constant-folded to 0, + // Also, we know that X u<= -1 can be constant-folded to 1, + // X u> -1 can be constant-folded to 0, + // Therefore, for given new DAG nodes and comparison predicates (ule/ugt), + // we will still produce the correct answer if: Q is a all-ones constant; + // and both P and K are *anything* other than undef, let's pick 0, + // because there's no point in rotating, and because (X * 0) most + // obviously "discards" of the value of X. + PAmts.push_back(DAG.getConstant(APInt::getNullValue(W), DL, SVT)); + KAmts.push_back(DAG.getConstant( + APInt::getNullValue(ShSVT.getSizeInBits()), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(APInt::getAllOnesValue(W), DL, SVT)); + return true; + } + AllDivisorsAreOnes = false; // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); @@ -4530,7 +4558,6 @@ // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. - unsigned W = D.getBitWidth(); APInt P = D0.zext(W + 1) .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); @@ -4553,6 +4580,10 @@ if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) return SDValue(); + // If this is a urem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) + return SDValue(); + // If this is a urem by a powers-of-two, avoid the fold since it can be // best implemented as a bit test. if (AllDivisorsArePowerOfTwo) Index: test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll =================================================================== --- test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -284,21 +284,11 @@ ; CHECK-LABEL: test_urem_odd_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI10_0 +; CHECK-NEXT: adrp x9, .LCPI10_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: adrp x8, .LCPI10_1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_1] -; CHECK-NEXT: adrp x8, .LCPI10_2 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI10_2] -; CHECK-NEXT: adrp x8, .LCPI10_3 -; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s -; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_3] -; CHECK-NEXT: neg v2.4s, v2.4s -; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s -; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b -; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI10_1] +; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -465,21 +455,11 @@ ; CHECK-LABEL: test_urem_odd_allones_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: adrp x9, .LCPI16_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: adrp x8, .LCPI16_1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_1] -; CHECK-NEXT: adrp x8, .LCPI16_2 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_2] -; CHECK-NEXT: adrp x8, .LCPI16_3 -; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s -; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_3] -; CHECK-NEXT: neg v2.4s, v2.4s -; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s -; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b -; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_1] +; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret Index: test/CodeGen/X86/urem-seteq-vec-nonsplat.ll =================================================================== --- test/CodeGen/X86/urem-seteq-vec-nonsplat.ll +++ test/CodeGen/X86/urem-seteq-vec-nonsplat.ll @@ -962,100 +962,33 @@ ; CHECK-SSE2-NEXT: movl $-858993459, %eax # imm = 0xCCCCCCCD ; CHECK-SSE2-NEXT: movd %eax, %xmm1 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] -; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; CHECK-SSE2-NEXT: psrld $2, %xmm1 -; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 -; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0] -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] -; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] -; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] -; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 -; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; CHECK-SSE2-NEXT: psrld $31, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-SSE41-LABEL: test_urem_odd_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: movl $-858993459, %eax # imm = 0xCCCCCCCD -; CHECK-SSE41-NEXT: movd %eax, %xmm1 -; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 -; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2 -; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; CHECK-SSE41-NEXT: psrld $2, %xmm2 -; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] -; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2 -; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0 -; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 +; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,858993459,4294967295,858993459] +; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 ; CHECK-SSE41-NEXT: psrld $31, %xmm0 ; CHECK-SSE41-NEXT: retq ; -; CHECK-AVX1-LABEL: test_urem_odd_one: -; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: movl $-858993459, %eax # imm = 0xCCCCCCCD -; CHECK-AVX1-NEXT: vmovd %eax, %xmm1 -; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2 -; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] -; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 -; CHECK-AVX1-NEXT: retq -; -; CHECK-AVX2-LABEL: test_urem_odd_one: -; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837] -; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 -; CHECK-AVX2-NEXT: movl $-858993459, %eax # imm = 0xCCCCCCCD -; CHECK-AVX2-NEXT: vmovd %eax, %xmm2 -; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 -; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] -; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1 -; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: retq -; -; CHECK-AVX512VL-LABEL: test_urem_odd_one: -; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: movl $-858993459, %eax # imm = 0xCCCCCCCD -; CHECK-AVX512VL-NEXT: vmovd %eax, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: retq +; CHECK-AVX-LABEL: test_urem_odd_one: +; CHECK-AVX: # %bb.0: +; CHECK-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 +; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 +; CHECK-AVX-NEXT: retq %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, %ret = zext <4 x i1> %cmp to <4 x i32> @@ -1158,20 +1091,9 @@ ; ; CHECK-AVX512VL-LABEL: test_urem_even_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm3, %xmm2, %xmm2 -; CHECK-AVX512VL-NEXT: movl $-1840700269, %eax # imm = 0x92492493 -; CHECK-AVX512VL-NEXT: vmovd %eax, %xmm3 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: retq @@ -1286,19 +1208,9 @@ ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,0,1374389535] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: retq @@ -1650,113 +1562,35 @@ define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind { ; CHECK-SSE2-LABEL: test_urem_odd_allones_and_one: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,3435973837] -; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2 -; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] +; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,4294967295,0,3435973837] +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] -; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1 -; CHECK-SSE2-NEXT: psrld $2, %xmm1 -; CHECK-SSE2-NEXT: psrld $31, %xmm2 -; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3 -; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3] -; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [5,4294967295,1,5] -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3] -; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3] -; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0] -; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm5 -; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm1[3,0] -; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm5[0,2] -; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2 -; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 -; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; CHECK-SSE2-NEXT: psrld $31, %xmm0 +; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1 +; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-SSE41-LABEL: test_urem_odd_allones_and_one: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,3435973837] -; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3 -; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1 -; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7] -; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2 -; CHECK-SSE41-NEXT: psrld $31, %xmm2 -; CHECK-SSE41-NEXT: psrld $2, %xmm1 -; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7] -; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] -; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1 -; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0 -; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1 +; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993459,1,4294967295,858993459] +; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0 ; CHECK-SSE41-NEXT: psrld $31, %xmm0 ; CHECK-SSE41-NEXT: retq ; -; CHECK-AVX1-LABEL: test_urem_odd_allones_and_one: -; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,3435973837] -; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 -; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2 -; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7] -; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7] -; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0 -; CHECK-AVX1-NEXT: retq -; -; CHECK-AVX2-LABEL: test_urem_odd_allones_and_one: -; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,3435973837] -; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 -; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 -; CHECK-AVX2-NEXT: retq -; -; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_one: -; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,3435973837] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: retq +; CHECK-AVX-LABEL: test_urem_odd_allones_and_one: +; CHECK-AVX: # %bb.0: +; CHECK-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 +; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0 +; CHECK-AVX-NEXT: retq %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, %ret = zext <4 x i1> %cmp to <4 x i32> @@ -1869,19 +1703,9 @@ ; ; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: retq @@ -1992,18 +1816,9 @@ ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_allones_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,0,1374389535] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: retq @@ -2107,18 +1922,9 @@ ; ; CHECK-AVX512VL-LABEL: test_urem_odd_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,3435973837] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: retq @@ -2230,19 +2036,9 @@ ; ; CHECK-AVX512VL-LABEL: test_urem_even_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,268435456,0,2454267027] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm4, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: retq @@ -2349,18 +2145,9 @@ ; ; CHECK-AVX512VL-LABEL: test_urem_odd_even_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,268435456,0,1374389535] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: retq @@ -2467,18 +2254,9 @@ ; ; CHECK-AVX512VL-LABEL: test_urem_odd_allones_and_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2147483649,268435456,0] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: retq @@ -2594,21 +2372,9 @@ ; ; CHECK-AVX512VL-LABEL: test_urem_even_allones_and_poweroftwo_and_one: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: movl $1, %eax -; CHECK-AVX512VL-NEXT: vmovd %eax, %xmm1 -; CHECK-AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [2454267027,2147483649,268435456,0] -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpmuludq %xmm4, %xmm2, %xmm2 -; CHECK-AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3] -; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1 -; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: retq