diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5612,17 +5612,16 @@ "Operand is DELETED_NODE!"); ConstantSDNode *N1C = dyn_cast(N1); ConstantSDNode *N2C = dyn_cast(N2); - ConstantFPSDNode *N1CFP = dyn_cast(N1); - ConstantFPSDNode *N2CFP = dyn_cast(N2); // Canonicalize constant to RHS if commutative. if (TLI->isCommutativeBinOp(Opcode)) { - if (N1C && !N2C) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } else if (N1CFP && !N2CFP) { - std::swap(N1CFP, N2CFP); + bool IsN1C = N1C || isConstantIntBuildVectorOrConstantInt(N1); + bool IsN2C = N2C || isConstantIntBuildVectorOrConstantInt(N2); + bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); + bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); + if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) { std::swap(N1, N2); + std::swap(N1C, N2C); } } diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll @@ -126,7 +126,8 @@ define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: in_constant_mone_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: orr v0.16b, v1.16b, v2.16b +; CHECK-NEXT: bic v0.16b, v2.16b, v1.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %n0 = xor <4 x i32> , %y ; %x %n1 = and <4 x i32> %n0, %mask @@ -152,8 +153,9 @@ define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: in_constant_mone_vary_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: and v0.16b, v1.16b, v2.16b -; CHECK-NEXT: orn v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v1.16b +; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %notmask = xor <4 x i32> %mask, %n0 = xor <4 x i32> , %y ; %x diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll --- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll +++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll @@ -13,10 +13,10 @@ ; CHECK-NEXT: xvredp 2, 0 ; CHECK-NEXT: xxswapd 1, 1 ; CHECK-NEXT: xxlor 3, 1, 1 -; CHECK-NEXT: xvmaddadp 3, 0, 2 -; CHECK-NEXT: xvnmsubadp 2, 2, 3 -; CHECK-NEXT: xvmaddadp 1, 0, 2 -; CHECK-NEXT: xvmsubadp 2, 2, 1 +; CHECK-NEXT: xvnmsubadp 3, 0, 2 +; CHECK-NEXT: xvmaddadp 2, 2, 3 +; CHECK-NEXT: xvnmsubadp 1, 0, 2 +; CHECK-NEXT: xvnmaddadp 2, 2, 1 ; CHECK-NEXT: xvmuldp 34, 34, 2 ; CHECK-NEXT: xvmuldp 35, 35, 2 ; CHECK-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll --- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll +++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll @@ -36,9 +36,9 @@ ; CHECK-NEXT: lvx 4, 0, 3 ; CHECK-NEXT: xxspltw 0, 0, 0 ; CHECK-NEXT: xvresp 1, 0 -; CHECK-NEXT: xvnmsubasp 35, 0, 1 +; CHECK-NEXT: xvmaddasp 35, 0, 1 ; CHECK-NEXT: xvmulsp 0, 34, 36 -; CHECK-NEXT: xvmaddasp 1, 1, 35 +; CHECK-NEXT: xvnmsubasp 1, 1, 35 ; CHECK-NEXT: xvmulsp 34, 0, 1 ; CHECK-NEXT: blr %ins = insertelement <4 x float> undef, float %a, i32 0 diff --git a/llvm/test/CodeGen/X86/dpbusd_const.ll b/llvm/test/CodeGen/X86/dpbusd_const.ll --- a/llvm/test/CodeGen/X86/dpbusd_const.ll +++ b/llvm/test/CodeGen/X86/dpbusd_const.ll @@ -68,8 +68,7 @@ ; AVXVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVXVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVXVNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVXVNNI-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0] -; AVXVNNI-NEXT: {vex} vpdpbusd %xmm0, %xmm2, %xmm1 +; AVXVNNI-NEXT: {vex} vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVXVNNI-NEXT: vmovd %xmm1, %eax ; AVXVNNI-NEXT: addl %edi, %eax ; AVXVNNI-NEXT: retq @@ -80,10 +79,9 @@ ; AVX512VNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VNNI-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] -; AVX512VNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0] -; AVX512VNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VNNI-NEXT: vpdpbusd %zmm0, %zmm1, %zmm2 -; AVX512VNNI-NEXT: vmovd %xmm2, %eax +; AVX512VNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512VNNI-NEXT: vmovd %xmm1, %eax ; AVX512VNNI-NEXT: addl %edi, %eax ; AVX512VNNI-NEXT: vzeroupper ; AVX512VNNI-NEXT: retq @@ -92,10 +90,9 @@ ; AVX512VLVNNI: # %bb.0: # %entry ; AVX512VLVNNI-NEXT: vpmovdb %xmm0, %xmm0 ; AVX512VLVNNI-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512VLVNNI-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0] -; AVX512VLVNNI-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLVNNI-NEXT: vpdpbusd %xmm0, %xmm1, %xmm2 -; AVX512VLVNNI-NEXT: vmovd %xmm2, %eax +; AVX512VLVNNI-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512VLVNNI-NEXT: vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512VLVNNI-NEXT: vmovd %xmm1, %eax ; AVX512VLVNNI-NEXT: addl %edi, %eax ; AVX512VLVNNI-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll --- a/llvm/test/CodeGen/X86/extractelement-fp.ll +++ b/llvm/test/CodeGen/X86/extractelement-fp.ll @@ -1070,7 +1070,7 @@ ; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; X64-NEXT: vandps %xmm1, %xmm0, %xmm1 ; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; X64-NEXT: vorps %xmm1, %xmm2, %xmm1 +; X64-NEXT: vorps %xmm2, %xmm1, %xmm1 ; X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; X64-NEXT: retq @@ -1081,7 +1081,7 @@ ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; X86-NEXT: vandps %xmm1, %xmm0, %xmm1 ; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; X86-NEXT: vorps %xmm1, %xmm2, %xmm1 +; X86-NEXT: vorps %xmm2, %xmm1, %xmm1 ; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovss %xmm0, (%esp) @@ -1099,7 +1099,7 @@ ; X64-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; X64-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; X64-NEXT: # xmm2 = mem[0,0] -; X64-NEXT: vorpd %xmm1, %xmm2, %xmm1 +; X64-NEXT: vorpd %xmm2, %xmm1, %xmm1 ; X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; X64-NEXT: vzeroupper @@ -1114,7 +1114,7 @@ ; X86-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 ; X86-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; X86-NEXT: # xmm2 = mem[0,0] -; X86-NEXT: vorpd %xmm1, %xmm2, %xmm1 +; X86-NEXT: vorpd %xmm2, %xmm1, %xmm1 ; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; X86-NEXT: vmovsd %xmm0, (%esp) diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll --- a/llvm/test/CodeGen/X86/fp-round.ll +++ b/llvm/test/CodeGen/X86/fp-round.ll @@ -41,7 +41,7 @@ ; AVX1-NEXT: callq ___extendhfsf2 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; AVX1-NEXT: vorps %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vorps %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: callq ___truncsfhf2 @@ -94,7 +94,7 @@ ; AVX1: ## %bb.0: ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; AVX1-NEXT: vorps %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vorps %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -130,7 +130,7 @@ ; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1] ; AVX1-NEXT: ## xmm2 = mem[0,0] -; AVX1-NEXT: vorpd %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vorpd %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -521,11 +521,11 @@ ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm3 ; AVX1-NEXT: vmovaps {{.*#+}} ymm4 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1] -; AVX1-NEXT: vorps %ymm3, %ymm4, %ymm3 +; AVX1-NEXT: vorps %ymm4, %ymm3, %ymm3 ; AVX1-NEXT: vaddps %ymm3, %ymm0, %ymm0 ; AVX1-NEXT: vroundps $11, %ymm0, %ymm0 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm2 -; AVX1-NEXT: vorps %ymm2, %ymm4, %ymm2 +; AVX1-NEXT: vorps %ymm4, %ymm2, %ymm2 ; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1 ; AVX1-NEXT: vroundps $11, %ymm1, %ymm1 ; AVX1-NEXT: retq @@ -620,11 +620,11 @@ ; AVX1-NEXT: vmovapd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] ; AVX1-NEXT: vandpd %ymm2, %ymm0, %ymm3 ; AVX1-NEXT: vmovapd {{.*#+}} ymm4 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1] -; AVX1-NEXT: vorpd %ymm3, %ymm4, %ymm3 +; AVX1-NEXT: vorpd %ymm4, %ymm3, %ymm3 ; AVX1-NEXT: vaddpd %ymm3, %ymm0, %ymm0 ; AVX1-NEXT: vroundpd $11, %ymm0, %ymm0 ; AVX1-NEXT: vandpd %ymm2, %ymm1, %ymm2 -; AVX1-NEXT: vorpd %ymm2, %ymm4, %ymm2 +; AVX1-NEXT: vorpd %ymm4, %ymm2, %ymm2 ; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1 ; AVX1-NEXT: vroundpd $11, %ymm1, %ymm1 ; AVX1-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -1326,7 +1326,7 @@ ; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [+Inf,+Inf] ; X64-AVX-NEXT: # xmm1 = mem[0,0] -; X64-AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 +; X64-AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: callq __extenddftf2@PLT ; X64-AVX-NEXT: addq $8, %rsp ; X64-AVX-NEXT: .LBB26_2: # %cleanup diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -465,9 +465,9 @@ ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-SSE2-NEXT: pand %xmm1, %xmm0 @@ -491,9 +491,9 @@ ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm2, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X64-SSE2-NEXT: pand %xmm1, %xmm0 @@ -611,9 +611,9 @@ ; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] ; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X86-SSE2-NEXT: pand %xmm1, %xmm0 @@ -637,9 +637,9 @@ ; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm2, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] ; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; X64-SSE2-NEXT: pand %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/pr43509.ll b/llvm/test/CodeGen/X86/pr43509.ll --- a/llvm/test/CodeGen/X86/pr43509.ll +++ b/llvm/test/CodeGen/X86/pr43509.ll @@ -4,12 +4,10 @@ define <8 x i8> @foo(<8 x float> %arg) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0 -; CHECK-NEXT: vpmovm2b %k0, %xmm1 -; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; CHECK-NEXT: vcmpltps %ymm2, %ymm0, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpltps %ymm1, %ymm0, %k1 +; CHECK-NEXT: vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1} ; CHECK-NEXT: vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z} -; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq bb: diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll --- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll +++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll @@ -336,23 +336,26 @@ ; CHECK-SSE1-LABEL: in_constant_mone_vary: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 +; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 -; CHECK-SSE1-NEXT: orps %xmm0, %xmm1 +; CHECK-SSE1-NEXT: andnps (%rcx), %xmm1 +; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_mone_vary: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0 -; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 +; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1 +; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 +; CHECK-SSE2-NEXT: andnps (%rdx), %xmm0 +; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_mone_vary: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0 -; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 +; CHECK-XOP-NEXT: vandnps (%rdx), %xmm0, %xmm1 +; CHECK-XOP-NEXT: vxorps %xmm0, %xmm1, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16 @@ -408,30 +411,32 @@ ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 -; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] -; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 -; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 -; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) +; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0 +; CHECK-SSE1-NEXT: movaps (%rcx), %xmm1 +; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE1-NEXT: movaps %xmm0, %xmm2 +; CHECK-SSE1-NEXT: andnps %xmm1, %xmm2 +; CHECK-SSE1-NEXT: xorps %xmm0, %xmm2 +; CHECK-SSE1-NEXT: movaps %xmm2, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 -; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 -; CHECK-SSE2-NEXT: pand (%rsi), %xmm0 -; CHECK-SSE2-NEXT: por %xmm1, %xmm0 +; CHECK-SSE2-NEXT: movdqa (%rsi), %xmm1 +; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-SSE2-NEXT: pxor (%rdx), %xmm2 +; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0 +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_mone_vary_invmask: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 +; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0 ; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 -; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0 -; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 +; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm1 +; CHECK-XOP-NEXT: vpandn %xmm1, %xmm0, %xmm1 +; CHECK-XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, <4 x i32> *%px, align 16 %y = load <4 x i32>, <4 x i32> *%py, align 16