Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetLowering.h +++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h @@ -2407,6 +2407,12 @@ return false; } + /// Try to convert an extract element of a vector binary operation into an + /// extract element followed by a scalar operation. + virtual bool shouldScalarizeBinop(SDValue VecOp) const { + return false; + } + // Return true if it is profitable to use a scalar input to a BUILD_VECTOR // even if the vector itself has multiple uses. virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -915,9 +915,11 @@ // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with // undef's. -static bool isAnyConstantBuildVector(const SDNode *N) { - return ISD::isBuildVectorOfConstantSDNodes(N) || - ISD::isBuildVectorOfConstantFPSDNodes(N); +static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { + if (V.getOpcode() != ISD::BUILD_VECTOR) + return false; + return isConstantOrConstantVector(V, NoOpaques) || + ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, @@ -15580,6 +15582,40 @@ return SDValue(EVE, 0); } +/// Transform a vector binary operation into a scalar binary operation by moving +/// the math/logic after an extract element of a vector. +static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, + bool LegalOperations) { + SDValue Vec = ExtElt->getOperand(0); + SDValue Index = ExtElt->getOperand(1); + auto *IndexC = dyn_cast(Index); + if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse()) + return SDValue(); + + // Targets may want to avoid this to prevent an expensive register transfer. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.shouldScalarizeBinop(Vec)) + return SDValue(); + + // Extracting an element of a vector constant is constant-folded, so this + // transform is just replacing a vector op with a scalar op while moving the + // extract. + SDValue Op0 = Vec.getOperand(0); + SDValue Op1 = Vec.getOperand(1); + if (isAnyConstantBuildVector(Op0, true) || + isAnyConstantBuildVector(Op1, true)) { + // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C' + // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC) + SDLoc DL(ExtElt); + EVT VT = ExtElt->getValueType(0); + SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index); + SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index); + return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1); + } + + return SDValue(); +} + SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue VecOp = N->getOperand(0); SDValue Index = N->getOperand(1); @@ -15670,6 +15706,9 @@ } } + if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations)) + return BO; + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. // We only perform this optimization before the op legalization phase because // we may introduce new vector instructions which are not backed by TD @@ -17055,8 +17094,8 @@ if (!N1->hasOneUse()) return SDValue(); - bool N0AnyConst = isAnyConstantBuildVector(N0.getNode()); - bool N1AnyConst = isAnyConstantBuildVector(N1.getNode()); + bool N0AnyConst = isAnyConstantBuildVector(N0); + bool N1AnyConst = isAnyConstantBuildVector(N1); if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode())) return SDValue(); if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode())) Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -1039,6 +1039,11 @@ bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override; + /// Scalar ops always have equal or better analysis/performance/power than + /// the vector equivalent, so this always makes sense if the scalar op is + /// supported. + bool shouldScalarizeBinop(SDValue) const override; + bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const override { // If we can replace more than 2 scalar stores, there will be a reduction Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -4875,6 +4875,18 @@ return (Index % ResVT.getVectorNumElements()) == 0; } +bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const { + // If the vector op is not supported, try to convert to scalar. + EVT VecVT = VecOp.getValueType(); + if (!isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), VecVT)) + return true; + + // If the vector op is supported, but the scalar op is not, the transform may + // not be worthwhile. + EVT ScalarVT = VecVT.getScalarType(); + return isOperationLegalOrCustomOrPromote(VecOp.getOpcode(), ScalarVT); +} + bool X86TargetLowering::isCheapToSpeculateCttz() const { // Speculate cttz only if we can directly use TZCNT. return Subtarget.hasBMI(); Index: llvm/trunk/test/CodeGen/X86/and-load-fold.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/and-load-fold.ll +++ llvm/trunk/test/CodeGen/X86/and-load-fold.ll @@ -8,9 +8,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: pextrw $4, %xmm0, %eax +; CHECK-NEXT: pextrw $2, %xmm0, %eax +; CHECK-NEXT: andb $95, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %Vp = bitcast <4 x i8>* %V to <3 x i8>* Index: llvm/trunk/test/CodeGen/X86/extract-fp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/extract-fp.ll +++ llvm/trunk/test/CodeGen/X86/extract-fp.ll @@ -4,8 +4,8 @@ define float @ext_fadd_v4f32(<4 x float> %x) { ; CHECK-LABEL: ext_fadd_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; CHECK-NEXT: addss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %bo = fadd <4 x float> %x, %ext = extractelement <4 x float> %bo, i32 2 @@ -15,9 +15,9 @@ define float @ext_fsub_v4f32(<4 x float> %x) { ; CHECK-LABEL: ext_fsub_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = -; CHECK-NEXT: subps %xmm0, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: subss %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %bo = fsub <4 x float> , %x @@ -28,19 +28,20 @@ define float @ext_fmul_v4f32(<4 x float> %x) { ; CHECK-LABEL: ext_fmul_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] +; CHECK-NEXT: mulss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %bo = fmul <4 x float> %x, %ext = extractelement <4 x float> %bo, i32 3 ret float %ext } +; TODO: X / 1.0 --> X + define float @ext_fdiv_v4f32(<4 x float> %x) { ; CHECK-LABEL: ext_fdiv_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: divps %xmm1, %xmm0 +; CHECK-NEXT: divss {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %bo = fdiv <4 x float> %x, %ext = extractelement <4 x float> %bo, i32 0 @@ -50,9 +51,9 @@ define float @ext_fdiv_v4f32_constant_op0(<4 x float> %x) { ; CHECK-LABEL: ext_fdiv_v4f32_constant_op0: ; CHECK: # %bb.0: -; CHECK-NEXT: movaps {{.*#+}} xmm1 = -; CHECK-NEXT: divps %xmm0, %xmm1 -; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] +; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: divss %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %bo = fdiv <4 x float> , %x Index: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll +++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll @@ -211,21 +211,19 @@ ; ; X86-SSE42-LABEL: test_reduce_v8i16: ; X86-SSE42: ## %bb.0: -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 ; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE42-NEXT: retl ; ; X86-AVX-LABEL: test_reduce_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: retl ; @@ -244,21 +242,19 @@ ; ; X64-SSE42-LABEL: test_reduce_v8i16: ; X64-SSE42: ## %bb.0: -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 ; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; ; X64-AVX-LABEL: test_reduce_v8i16: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX-NEXT: retq %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> @@ -309,26 +305,24 @@ ; ; X86-SSE42-LABEL: test_reduce_v16i8: ; X86-SSE42: ## %bb.0: -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE42-NEXT: psrlw $8, %xmm2 -; X86-SSE42-NEXT: pminub %xmm0, %xmm2 -; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $127, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; ; X86-AVX-LABEL: test_reduce_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: xorb $127, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: retl ; @@ -366,26 +360,24 @@ ; ; X64-SSE42-LABEL: test_reduce_v16i8: ; X64-SSE42: ## %bb.0: -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE42-NEXT: psrlw $8, %xmm2 -; X64-SSE42-NEXT: pminub %xmm0, %xmm2 -; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $127, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; ; X64-AVX-LABEL: test_reduce_v16i8: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: xorb $127, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> @@ -736,11 +728,10 @@ ; X86-SSE42-LABEL: test_reduce_v16i16: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 ; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE42-NEXT: retl ; @@ -748,11 +739,10 @@ ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax +; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -761,11 +751,10 @@ ; X86-AVX2: ## %bb.0: ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vmovd %xmm0, %eax +; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -787,11 +776,10 @@ ; X64-SSE42-LABEL: test_reduce_v16i16: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 ; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; @@ -799,11 +787,10 @@ ; X64-AVX1: ## %bb.0: ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vmovd %xmm0, %eax +; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -812,11 +799,10 @@ ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -825,11 +811,10 @@ ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vmovd %xmm0, %eax +; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -890,14 +875,13 @@ ; X86-SSE42-LABEL: test_reduce_v32i8: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE42-NEXT: psrlw $8, %xmm2 -; X86-SSE42-NEXT: pminub %xmm0, %xmm2 -; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $127, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -905,13 +889,12 @@ ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: xorb $127, %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -920,13 +903,12 @@ ; X86-AVX2: ## %bb.0: ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: xorb $127, %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -971,14 +953,13 @@ ; X64-SSE42-LABEL: test_reduce_v32i8: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE42-NEXT: psrlw $8, %xmm2 -; X64-SSE42-NEXT: pminub %xmm0, %xmm2 -; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $127, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -986,13 +967,12 @@ ; X64-AVX1: ## %bb.0: ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: xorb $127, %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1001,13 +981,12 @@ ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: xorb $127, %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1016,13 +995,12 @@ ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: xorb $127, %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -1513,11 +1491,10 @@ ; X86-SSE42-NEXT: pmaxsw %xmm3, %xmm1 ; X86-SSE42-NEXT: pmaxsw %xmm2, %xmm0 ; X86-SSE42-NEXT: pmaxsw %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI10_0, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 ; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE42-NEXT: retl ; @@ -1528,11 +1505,10 @@ ; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 ; X86-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax +; X86-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -1542,11 +1518,10 @@ ; X86-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X86-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vmovd %xmm0, %eax +; X86-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -1572,11 +1547,10 @@ ; X64-SSE42-NEXT: pmaxsw %xmm3, %xmm1 ; X64-SSE42-NEXT: pmaxsw %xmm2, %xmm0 ; X64-SSE42-NEXT: pmaxsw %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 ; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; @@ -1587,11 +1561,10 @@ ; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 ; X64-AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vmovd %xmm0, %eax +; X64-AVX1-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1601,11 +1574,10 @@ ; X64-AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1616,11 +1588,10 @@ ; X64-AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vmovd %xmm0, %eax +; X64-AVX512-NEXT: xorl $32767, %eax ## imm = 0x7FFF ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -1696,14 +1667,13 @@ ; X86-SSE42-NEXT: pmaxsb %xmm3, %xmm1 ; X86-SSE42-NEXT: pmaxsb %xmm2, %xmm0 ; X86-SSE42-NEXT: pmaxsb %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE42-NEXT: psrlw $8, %xmm2 -; X86-SSE42-NEXT: pminub %xmm0, %xmm2 -; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI11_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $127, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -1714,13 +1684,12 @@ ; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 ; X86-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: xorb $127, %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -1730,13 +1699,12 @@ ; X86-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X86-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: xorb $127, %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -1793,14 +1761,13 @@ ; X64-SSE42-NEXT: pmaxsb %xmm3, %xmm1 ; X64-SSE42-NEXT: pmaxsb %xmm2, %xmm0 ; X64-SSE42-NEXT: pmaxsb %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE42-NEXT: psrlw $8, %xmm2 -; X64-SSE42-NEXT: pminub %xmm0, %xmm2 -; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $127, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -1811,13 +1778,12 @@ ; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 ; X64-AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: xorb $127, %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1827,13 +1793,12 @@ ; X64-AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: xorb $127, %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1844,13 +1809,12 @@ ; X64-AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: xorb $127, %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll +++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll @@ -213,21 +213,19 @@ ; ; X86-SSE42-LABEL: test_reduce_v8i16: ; X86-SSE42: ## %bb.0: -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 ; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE42-NEXT: retl ; ; X86-AVX-LABEL: test_reduce_v8i16: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: retl ; @@ -246,21 +244,19 @@ ; ; X64-SSE42-LABEL: test_reduce_v8i16: ; X64-SSE42: ## %bb.0: -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 ; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; ; X64-AVX-LABEL: test_reduce_v8i16: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vmovd %xmm0, %eax +; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX-NEXT: retq %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> @@ -311,26 +307,24 @@ ; ; X86-SSE42-LABEL: test_reduce_v16i8: ; X86-SSE42: ## %bb.0: -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE42-NEXT: psrlw $8, %xmm2 -; X86-SSE42-NEXT: pminub %xmm0, %xmm2 -; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $-128, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; ; X86-AVX-LABEL: test_reduce_v16i8: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: xorb $-128, %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: retl ; @@ -368,26 +362,24 @@ ; ; X64-SSE42-LABEL: test_reduce_v16i8: ; X64-SSE42: ## %bb.0: -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE42-NEXT: psrlw $8, %xmm2 -; X64-SSE42-NEXT: pminub %xmm0, %xmm2 -; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $-128, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; ; X64-AVX-LABEL: test_reduce_v16i8: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX-NEXT: xorb $-128, %al ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> @@ -740,11 +732,10 @@ ; X86-SSE42-LABEL: test_reduce_v16i16: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: pminsw %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 ; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE42-NEXT: retl ; @@ -752,11 +743,10 @@ ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax +; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -765,11 +755,10 @@ ; X86-AVX2: ## %bb.0: ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vmovd %xmm0, %eax +; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -791,11 +780,10 @@ ; X64-SSE42-LABEL: test_reduce_v16i16: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: pminsw %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 ; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; @@ -803,11 +791,10 @@ ; X64-AVX1: ## %bb.0: ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vmovd %xmm0, %eax +; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -816,11 +803,10 @@ ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -829,11 +815,10 @@ ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vmovd %xmm0, %eax +; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -894,14 +879,13 @@ ; X86-SSE42-LABEL: test_reduce_v32i8: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE42-NEXT: psrlw $8, %xmm2 -; X86-SSE42-NEXT: pminub %xmm0, %xmm2 -; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $-128, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -909,13 +893,12 @@ ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: xorb $-128, %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -924,13 +907,12 @@ ; X86-AVX2: ## %bb.0: ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: xorb $-128, %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -975,14 +957,13 @@ ; X64-SSE42-LABEL: test_reduce_v32i8: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE42-NEXT: psrlw $8, %xmm2 -; X64-SSE42-NEXT: pminub %xmm0, %xmm2 -; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $-128, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -990,13 +971,12 @@ ; X64-AVX1: ## %bb.0: ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: xorb $-128, %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1005,13 +985,12 @@ ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: xorb $-128, %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1020,13 +999,12 @@ ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: xorb $-128, %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -1517,11 +1495,10 @@ ; X86-SSE42-NEXT: pminsw %xmm3, %xmm1 ; X86-SSE42-NEXT: pminsw %xmm2, %xmm0 ; X86-SSE42-NEXT: pminsw %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI10_0, %xmm0 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 ; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE42-NEXT: retl ; @@ -1532,11 +1509,10 @@ ; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 ; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax +; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -1546,11 +1522,10 @@ ; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vmovd %xmm0, %eax +; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -1576,11 +1551,10 @@ ; X64-SSE42-NEXT: pminsw %xmm3, %xmm1 ; X64-SSE42-NEXT: pminsw %xmm2, %xmm0 ; X64-SSE42-NEXT: pminsw %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 ; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; @@ -1591,11 +1565,10 @@ ; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 ; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vmovd %xmm0, %eax +; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1605,11 +1578,10 @@ ; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1620,11 +1592,10 @@ ; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vmovd %xmm0, %eax +; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -1700,14 +1671,13 @@ ; X86-SSE42-NEXT: pminsb %xmm3, %xmm1 ; X86-SSE42-NEXT: pminsb %xmm2, %xmm0 ; X86-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE42-NEXT: psrlw $8, %xmm2 -; X86-SSE42-NEXT: pminub %xmm0, %xmm2 -; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor LCPI11_0, %xmm0 +; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X86-SSE42-NEXT: psrlw $8, %xmm1 +; X86-SSE42-NEXT: pminub %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: xorb $-128, %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -1718,13 +1688,12 @@ ; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 ; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: xorb $-128, %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -1734,13 +1703,12 @@ ; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: xorb $-128, %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -1797,14 +1765,13 @@ ; X64-SSE42-NEXT: pminsb %xmm3, %xmm1 ; X64-SSE42-NEXT: pminsb %xmm2, %xmm0 ; X64-SSE42-NEXT: pminsb %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE42-NEXT: psrlw $8, %xmm2 -; X64-SSE42-NEXT: pminub %xmm0, %xmm2 -; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0 +; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 +; X64-SSE42-NEXT: psrlw $8, %xmm1 +; X64-SSE42-NEXT: pminub %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: xorb $-128, %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -1815,13 +1782,12 @@ ; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 ; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 -; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: xorb $-128, %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1831,13 +1797,12 @@ ; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: xorb $-128, %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1848,13 +1813,12 @@ ; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: xorb $-128, %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll +++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -240,18 +240,18 @@ ; X86-SSE2-NEXT: psrld $16, %xmm1 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE2-NEXT: retl ; ; X86-SSE42-LABEL: test_reduce_v8i16: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: notl %eax ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE42-NEXT: retl ; @@ -260,8 +260,8 @@ ; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vmovd %xmm0, %eax +; X86-AVX-NEXT: notl %eax ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX-NEXT: retl ; @@ -282,18 +282,18 @@ ; X64-SSE2-NEXT: psrld $16, %xmm1 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE2-NEXT: retq ; ; X64-SSE42-LABEL: test_reduce_v8i16: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: notl %eax ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; @@ -302,8 +302,8 @@ ; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vmovd %xmm0, %eax +; X64-AVX1-NEXT: notl %eax ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX1-NEXT: retq ; @@ -312,8 +312,8 @@ ; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: notl %eax ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX2-NEXT: retq ; @@ -321,8 +321,8 @@ ; X64-AVX512: ## %bb.0: ; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; X64-AVX512-NEXT: vmovd %xmm0, %eax +; X64-AVX512-NEXT: notl %eax ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX512-NEXT: retq %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> @@ -358,13 +358,13 @@ ; X86-SSE42-LABEL: test_reduce_v16i8: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE42-NEXT: psrlw $8, %xmm2 -; X86-SSE42-NEXT: pminub %xmm0, %xmm2 -; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE42-NEXT: psrlw $8, %xmm0 +; X86-SSE42-NEXT: pminub %xmm1, %xmm0 +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: notb %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -372,11 +372,11 @@ ; X86-AVX: ## %bb.0: ; X86-AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX-NEXT: notb %al ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX-NEXT: retl ; @@ -399,13 +399,13 @@ ; X64-SSE42-LABEL: test_reduce_v16i8: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE42-NEXT: psrlw $8, %xmm2 -; X64-SSE42-NEXT: pminub %xmm0, %xmm2 -; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE42-NEXT: psrlw $8, %xmm0 +; X64-SSE42-NEXT: pminub %xmm1, %xmm0 +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: notb %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -413,11 +413,11 @@ ; X64-AVX1: ## %bb.0: ; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: notb %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: retq ; @@ -425,11 +425,11 @@ ; X64-AVX2: ## %bb.0: ; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: notb %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: retq ; @@ -439,8 +439,8 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: notb %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: retq %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> @@ -845,8 +845,8 @@ ; X86-SSE2-NEXT: psrld $16, %xmm1 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE2-NEXT: retl ; @@ -854,10 +854,10 @@ ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0 ; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: notl %eax ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE42-NEXT: retl ; @@ -868,8 +868,8 @@ ; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax +; X86-AVX1-NEXT: notl %eax ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -881,8 +881,8 @@ ; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vmovd %xmm0, %eax +; X86-AVX2-NEXT: notl %eax ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -908,8 +908,8 @@ ; X64-SSE2-NEXT: psrld $16, %xmm1 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE2-NEXT: retq ; @@ -917,10 +917,10 @@ ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0 ; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: notl %eax ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; @@ -931,8 +931,8 @@ ; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vmovd %xmm0, %eax +; X64-AVX1-NEXT: notl %eax ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -944,8 +944,8 @@ ; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: notl %eax ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -956,8 +956,8 @@ ; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; X64-AVX512-NEXT: vmovd %xmm0, %eax +; X64-AVX512-NEXT: notl %eax ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -999,13 +999,13 @@ ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0 ; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE42-NEXT: psrlw $8, %xmm2 -; X86-SSE42-NEXT: pminub %xmm0, %xmm2 -; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE42-NEXT: psrlw $8, %xmm0 +; X86-SSE42-NEXT: pminub %xmm1, %xmm0 +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: notb %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -1015,11 +1015,11 @@ ; X86-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: notb %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -1030,11 +1030,11 @@ ; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: notb %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -1060,13 +1060,13 @@ ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0 ; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE42-NEXT: psrlw $8, %xmm2 -; X64-SSE42-NEXT: pminub %xmm0, %xmm2 -; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE42-NEXT: psrlw $8, %xmm0 +; X64-SSE42-NEXT: pminub %xmm1, %xmm0 +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: notb %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -1076,11 +1076,11 @@ ; X64-AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: notb %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1091,11 +1091,11 @@ ; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: notb %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1108,8 +1108,8 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: notb %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -1688,8 +1688,8 @@ ; X86-SSE2-NEXT: psrld $16, %xmm1 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE2-NEXT: retl ; @@ -1699,10 +1699,10 @@ ; X86-SSE42-NEXT: pmaxuw %xmm2, %xmm0 ; X86-SSE42-NEXT: pmaxuw %xmm1, %xmm0 ; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X86-SSE42-NEXT: movd %xmm0, %eax +; X86-SSE42-NEXT: notl %eax ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE42-NEXT: retl ; @@ -1716,8 +1716,8 @@ ; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vmovd %xmm0, %eax +; X86-AVX1-NEXT: notl %eax ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -1730,8 +1730,8 @@ ; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vmovd %xmm0, %eax +; X86-AVX2-NEXT: notl %eax ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -1761,8 +1761,8 @@ ; X64-SSE2-NEXT: psrld $16, %xmm1 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE2-NEXT: retq ; @@ -1772,10 +1772,10 @@ ; X64-SSE42-NEXT: pmaxuw %xmm2, %xmm0 ; X64-SSE42-NEXT: pmaxuw %xmm1, %xmm0 ; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0 ; X64-SSE42-NEXT: movd %xmm0, %eax +; X64-SSE42-NEXT: notl %eax ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE42-NEXT: retq ; @@ -1789,8 +1789,8 @@ ; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vmovd %xmm0, %eax +; X64-AVX1-NEXT: notl %eax ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1803,8 +1803,8 @@ ; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vmovd %xmm0, %eax +; X64-AVX2-NEXT: notl %eax ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1817,8 +1817,8 @@ ; X64-AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; X64-AVX512-NEXT: vmovd %xmm0, %eax +; X64-AVX512-NEXT: notl %eax ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq @@ -1867,13 +1867,13 @@ ; X86-SSE42-NEXT: pmaxub %xmm2, %xmm0 ; X86-SSE42-NEXT: pmaxub %xmm1, %xmm0 ; X86-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 -; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X86-SSE42-NEXT: psrlw $8, %xmm2 -; X86-SSE42-NEXT: pminub %xmm0, %xmm2 -; X86-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X86-SSE42-NEXT: pxor %xmm1, %xmm0 +; X86-SSE42-NEXT: pxor %xmm0, %xmm1 +; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X86-SSE42-NEXT: psrlw $8, %xmm0 +; X86-SSE42-NEXT: pminub %xmm1, %xmm0 +; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X86-SSE42-NEXT: notb %al ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X86-SSE42-NEXT: retl ; @@ -1886,11 +1886,11 @@ ; X86-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX1-NEXT: notb %al ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX1-NEXT: vzeroupper ; X86-AVX1-NEXT: retl @@ -1902,11 +1902,11 @@ ; X86-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X86-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X86-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X86-AVX2-NEXT: notb %al ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X86-AVX2-NEXT: vzeroupper ; X86-AVX2-NEXT: retl @@ -1936,13 +1936,13 @@ ; X64-SSE42-NEXT: pmaxub %xmm2, %xmm0 ; X64-SSE42-NEXT: pmaxub %xmm1, %xmm0 ; X64-SSE42-NEXT: pcmpeqd %xmm1, %xmm1 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 -; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 -; X64-SSE42-NEXT: psrlw $8, %xmm2 -; X64-SSE42-NEXT: pminub %xmm0, %xmm2 -; X64-SSE42-NEXT: phminposuw %xmm2, %xmm0 -; X64-SSE42-NEXT: pxor %xmm1, %xmm0 +; X64-SSE42-NEXT: pxor %xmm0, %xmm1 +; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 +; X64-SSE42-NEXT: psrlw $8, %xmm0 +; X64-SSE42-NEXT: pminub %xmm1, %xmm0 +; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax +; X64-SSE42-NEXT: notb %al ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax ; X64-SSE42-NEXT: retq ; @@ -1955,11 +1955,11 @@ ; X64-AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX1-NEXT: notb %al ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX1-NEXT: vzeroupper ; X64-AVX1-NEXT: retq @@ -1971,11 +1971,11 @@ ; X64-AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; X64-AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX2-NEXT: notb %al ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq @@ -1990,8 +1990,8 @@ ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; X64-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax +; X64-AVX512-NEXT: notb %al ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax ; X64-AVX512-NEXT: vzeroupper ; X64-AVX512-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll +++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -242,8 +242,8 @@ ; X86-SSE2-NEXT: psrld $16, %xmm1 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE2-NEXT: retl ; @@ -278,8 +278,8 @@ ; X64-SSE2-NEXT: psrld $16, %xmm1 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE2-NEXT: retq ; @@ -785,8 +785,8 @@ ; X86-SSE2-NEXT: psrld $16, %xmm1 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE2-NEXT: retl ; @@ -839,8 +839,8 @@ ; X64-SSE2-NEXT: psrld $16, %xmm1 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE2-NEXT: retq ; @@ -1592,8 +1592,8 @@ ; X86-SSE2-NEXT: psrld $16, %xmm1 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax +; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-SSE2-NEXT: retl ; @@ -1656,8 +1656,8 @@ ; X64-SSE2-NEXT: psrld $16, %xmm1 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax +; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax ; X64-SSE2-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/known-bits-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/known-bits-vector.ll +++ llvm/trunk/test/CodeGen/X86/known-bits-vector.ll @@ -5,18 +5,14 @@ define i32 @knownbits_mask_extract_sext(<8 x i16> %a0) nounwind { ; X32-LABEL: knownbits_mask_extract_sext: ; X32: # %bb.0: -; X32-NEXT: movl $15, %eax -; X32-NEXT: vmovd %eax, %xmm1 -; X32-NEXT: vpand %xmm1, %xmm0, %xmm0 -; X32-NEXT: vpextrw $0, %xmm0, %eax +; X32-NEXT: vmovd %xmm0, %eax +; X32-NEXT: andl $15, %eax ; X32-NEXT: retl ; ; X64-LABEL: knownbits_mask_extract_sext: ; X64: # %bb.0: -; X64-NEXT: movl $15, %eax -; X64-NEXT: vmovd %eax, %xmm1 -; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpextrw $0, %xmm0, %eax +; X64-NEXT: vmovd %xmm0, %eax +; X64-NEXT: andl $15, %eax ; X64-NEXT: retq %1 = and <8 x i16> %a0, %2 = extractelement <8 x i16> %1, i32 0 @@ -38,8 +34,8 @@ ; ; X64-LABEL: knownbits_mask_extract_uitofp: ; X64: # %bb.0: -; X64-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; X64-NEXT: vmovq %xmm0, %rax +; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X64-NEXT: retq %1 = and <2 x i64> %a0, Index: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll +++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll @@ -74,8 +74,8 @@ ; ; X64-LABEL: signbits_ashr_extract_sitofp_0: ; X64: # %bb.0: -; X64-NEXT: vpsrlq $32, %xmm0, %xmm0 ; X64-NEXT: vmovq %xmm0, %rax +; X64-NEXT: shrq $32, %rax ; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X64-NEXT: retq %1 = ashr <2 x i64> %a0, @@ -101,12 +101,9 @@ ; ; X64-LABEL: signbits_ashr_extract_sitofp_1: ; X64: # %bb.0: -; X64-NEXT: vpsrlq $32, %xmm0, %xmm0 -; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483648,1] -; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; X64-NEXT: shrq $32, %rax +; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X64-NEXT: retq %1 = ashr <2 x i64> %a0, %2 = extractelement <2 x i64> %1, i32 0 @@ -132,13 +129,10 @@ ; ; X64-LABEL: signbits_ashr_shl_extract_sitofp: ; X64: # %bb.0: -; X64-NEXT: vpsrlq $61, %xmm0, %xmm0 -; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [4,8] -; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpsubq %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpsllq $20, %xmm0, %xmm0 ; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; X64-NEXT: sarq $61, %rax +; X64-NEXT: shll $20, %eax +; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 ; X64-NEXT: retq %1 = ashr <2 x i64> %a0, %2 = shl <2 x i64> %1, @@ -168,10 +162,8 @@ ; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp: ; X64: # %bb.0: ; X64-NEXT: sarq $30, %rdi -; X64-NEXT: vmovq %rdi, %xmm0 -; X64-NEXT: vpsrlq $3, %xmm0, %xmm0 -; X64-NEXT: vmovq %xmm0, %rax -; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 +; X64-NEXT: shrq $3, %rdi +; X64-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 ; X64-NEXT: retq %1 = ashr i64 %a0, 30 %2 = insertelement <2 x i64> undef, i64 %1, i32 0 Index: llvm/trunk/test/CodeGen/X86/pr30511.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr30511.ll +++ llvm/trunk/test/CodeGen/X86/pr30511.ll @@ -9,7 +9,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0 -; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: mulsd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: movq %xmm0, %rax ; CHECK-NEXT: retq %1 = fadd <2 x double> %a, Index: llvm/trunk/test/CodeGen/X86/setcc-combine.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/setcc-combine.ll +++ llvm/trunk/test/CodeGen/X86/setcc-combine.ll @@ -6,18 +6,16 @@ ; SSE2-LABEL: test_eq_1: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: notl %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_eq_1: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pextrd $1, %xmm0, %eax +; SSE41-NEXT: pextrd $1, %xmm1, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %A, %B %sext = sext <4 x i1> %cmp to <4 x i32> @@ -65,18 +63,16 @@ ; SSE2-LABEL: test_ge_1: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: notl %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_ge_1: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: pextrd $1, %xmm0, %eax +; SSE41-NEXT: pextrd $1, %xmm1, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %A, %B %sext = sext <4 x i1> %cmp to <4 x i32> @@ -124,18 +120,16 @@ ; SSE2-LABEL: test_eq_2: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: notl %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_eq_2: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pextrd $1, %xmm1, %eax +; SSE41-NEXT: pextrd $1, %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %B, %A %sext = sext <4 x i1> %cmp to <4 x i32> @@ -170,18 +164,16 @@ ; SSE2-LABEL: test_le_2: ; SSE2: # %bb.0: ; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: notl %eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_le_2: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm0, %xmm1 -; SSE41-NEXT: pextrd $1, %xmm1, %eax +; SSE41-NEXT: pextrd $1, %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: retq %cmp = icmp slt <4 x i32> %B, %A %sext = sext <4 x i1> %cmp to <4 x i32> Index: llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll +++ llvm/trunk/test/CodeGen/X86/shrink_vmul-widen.ll @@ -2096,7 +2096,7 @@ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movdqa (%eax), %xmm5 ; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movdqa (%ecx), %xmm3 +; X86-SSE-NEXT: movdqa (%ecx), %xmm2 ; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6 ; X86-SSE-NEXT: pxor %xmm0, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] @@ -2110,10 +2110,10 @@ ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm0 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1] -; X86-SSE-NEXT: movd %xmm2, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm6[2,3,0,1] -; X86-SSE-NEXT: movd %xmm2, %esi +; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1] +; X86-SSE-NEXT: movd %xmm3, %eax +; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1] +; X86-SSE-NEXT: movd %xmm3, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm7 @@ -2122,7 +2122,7 @@ ; X86-SSE-NEXT: movd %xmm6, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi -; X86-SSE-NEXT: movd %edx, %xmm2 +; X86-SSE-NEXT: movd %edx, %xmm3 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3] ; X86-SSE-NEXT: movd %xmm5, %eax ; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3] @@ -2130,60 +2130,57 @@ ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm5 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] +; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3] ; X86-SSE-NEXT: movd %xmm6, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3] +; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3] ; X86-SSE-NEXT: movd %xmm6, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm6 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1] ; X86-SSE-NEXT: movd %xmm7, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1] +; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1] ; X86-SSE-NEXT: movd %xmm7, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm7 ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] ; X86-SSE-NEXT: movd %xmm4, %eax -; X86-SSE-NEXT: movd %xmm3, %esi +; X86-SSE-NEXT: movd %xmm2, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3] ; X86-SSE-NEXT: movd %xmm4, %eax ; X86-SSE-NEXT: movd %edx, %xmm4 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3] -; X86-SSE-NEXT: movd %xmm3, %esi +; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3] +; X86-SSE-NEXT: movd %xmm2, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi -; X86-SSE-NEXT: movd %edx, %xmm3 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; X86-SSE-NEXT: movd %edx, %xmm2 +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] ; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0] ; X86-SSE-NEXT: movd %xmm1, %eax -; X86-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm6[0,0] +; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0] ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199] ; X86-SSE-NEXT: pmuludq %xmm1, %xmm4 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] -; X86-SSE-NEXT: pmuludq %xmm1, %xmm3 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; X86-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] ; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0] +; X86-SSE-NEXT: pmuludq %xmm1, %xmm3 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm5 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl 32(%ecx) -; X86-SSE-NEXT: pmuludq %xmm1, %xmm2 -; X86-SSE-NEXT: pmuludq %xmm1, %xmm5 -; X86-SSE-NEXT: movd %edx, %xmm0 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,2,2,3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007 -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: pmuludq %xmm0, %xmm2 -; X86-SSE-NEXT: movd %xmm2, (%eax) -; X86-SSE-NEXT: movdqa %xmm1, (%eax) +; X86-SSE-NEXT: movdqa %xmm0, (%eax) ; X86-SSE-NEXT: movdqa %xmm4, (%eax) +; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007 +; X86-SSE-NEXT: movl %eax, (%eax) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -2252,14 +2249,13 @@ ; X86-AVX1-NEXT: vpinsrd $1, (%esp), %xmm1, %xmm1 # 4-byte Folded Reload ; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload ; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload -; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Folded Reload -; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8199,8199,8199,8199] -; X86-AVX1-NEXT: vpmulld %xmm3, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1 +; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-AVX1-NEXT: # imm = 0x2007 +; X86-AVX1-NEXT: movl %eax, (%eax) +; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] +; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X86-AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm1 -; X86-AVX1-NEXT: vmovd %xmm1, (%eax) ; X86-AVX1-NEXT: vmovaps %ymm0, (%eax) ; X86-AVX1-NEXT: addl $16, %esp ; X86-AVX1-NEXT: popl %esi @@ -2326,12 +2322,11 @@ ; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: xorl %edx, %edx ; X86-AVX2-NEXT: divl 32(%esi) -; X86-AVX2-NEXT: vmovd %edx, %xmm0 -; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199] -; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1 -; X86-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovd %xmm0, (%eax) -; X86-AVX2-NEXT: vmovdqa %ymm1, (%eax) +; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199] +; X86-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007 +; X86-AVX2-NEXT: movl %eax, (%eax) +; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax) ; X86-AVX2-NEXT: popl %esi ; X86-AVX2-NEXT: popl %edi ; X86-AVX2-NEXT: vzeroupper @@ -2345,8 +2340,8 @@ ; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6 ; X64-SSE-NEXT: pxor %xmm0, %xmm0 ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64-SSE-NEXT: movdqa %xmm5, %xmm4 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] +; X64-SSE-NEXT: movdqa %xmm5, %xmm3 +; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] ; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3] ; X64-SSE-NEXT: movd %xmm0, %eax @@ -2355,10 +2350,10 @@ ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm8 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1] -; X64-SSE-NEXT: movd %xmm3, %eax -; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1] -; X64-SSE-NEXT: movd %xmm3, %ecx +; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1] +; X64-SSE-NEXT: movd %xmm4, %eax +; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1] +; X64-SSE-NEXT: movd %xmm4, %ecx ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm7 @@ -2367,7 +2362,7 @@ ; X64-SSE-NEXT: movd %xmm6, %ecx ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx -; X64-SSE-NEXT: movd %edx, %xmm3 +; X64-SSE-NEXT: movd %edx, %xmm4 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3] ; X64-SSE-NEXT: movd %xmm5, %eax ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3] @@ -2375,16 +2370,16 @@ ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm5 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3] +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0] +; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3] ; X64-SSE-NEXT: movd %xmm6, %eax ; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3] ; X64-SSE-NEXT: movd %xmm6, %ecx ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm6 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1] +; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1] ; X64-SSE-NEXT: movd %xmm7, %eax ; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1] ; X64-SSE-NEXT: movd %xmm7, %ecx @@ -2392,13 +2387,13 @@ ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm7 ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] -; X64-SSE-NEXT: movd %xmm4, %eax +; X64-SSE-NEXT: movd %xmm3, %eax ; X64-SSE-NEXT: movd %xmm2, %ecx ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3] -; X64-SSE-NEXT: movd %xmm4, %eax +; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3] +; X64-SSE-NEXT: movd %xmm3, %eax ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3] ; X64-SSE-NEXT: movd %xmm2, %ecx ; X64-SSE-NEXT: xorl %edx, %edx @@ -2409,24 +2404,21 @@ ; X64-SSE-NEXT: movd %xmm1, %eax ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl 32(%rsi) -; X64-SSE-NEXT: movd %edx, %xmm1 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8199,8199,8199,8199] -; X64-SSE-NEXT: pmuludq %xmm4, %xmm0 +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199] +; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0] -; X64-SSE-NEXT: pmuludq %xmm4, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-SSE-NEXT: pmuludq %xmm4, %xmm3 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] +; X64-SSE-NEXT: pmuludq %xmm1, %xmm4 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3] ; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[0,0] -; X64-SSE-NEXT: pmuludq %xmm4, %xmm5 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007 -; X64-SSE-NEXT: movd %eax, %xmm3 -; X64-SSE-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE-NEXT: movd %xmm3, (%rax) +; X64-SSE-NEXT: pmuludq %xmm1, %xmm5 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3] +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007 +; X64-SSE-NEXT: movl %eax, (%rax) ; X64-SSE-NEXT: movdqa %xmm2, (%rax) ; X64-SSE-NEXT: movdqa %xmm0, (%rax) ; X64-SSE-NEXT: retq @@ -2493,11 +2485,10 @@ ; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2 ; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2 ; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2 -; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm2 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 -; X64-AVX1-NEXT: vmovd %r8d, %xmm2 ; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1 -; X64-AVX1-NEXT: vmovd %xmm1, (%rax) +; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007 +; X64-AVX1-NEXT: movl %eax, (%rax) ; X64-AVX1-NEXT: vmovaps %ymm0, (%rax) ; X64-AVX1-NEXT: popq %rbx ; X64-AVX1-NEXT: popq %rbp @@ -2557,12 +2548,11 @@ ; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: xorl %edx, %edx ; X64-AVX2-NEXT: divl 32(%rsi) -; X64-AVX2-NEXT: vmovd %edx, %xmm0 -; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199] -; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovd %xmm0, (%rax) -; X64-AVX2-NEXT: vmovdqa %ymm1, (%rax) +; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199] +; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007 +; X64-AVX2-NEXT: movl %eax, (%rax) +; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax) ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq %a0 = load <9 x i16>, <9 x i16>* %p0, align 64 Index: llvm/trunk/test/CodeGen/X86/shrink_vmul.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/shrink_vmul.ll +++ llvm/trunk/test/CodeGen/X86/shrink_vmul.ll @@ -2079,7 +2079,7 @@ ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movdqa (%eax), %xmm5 ; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movdqa (%ecx), %xmm3 +; X86-SSE-NEXT: movdqa (%ecx), %xmm2 ; X86-SSE-NEXT: movdqa 16(%ecx), %xmm6 ; X86-SSE-NEXT: pxor %xmm0, %xmm0 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] @@ -2093,10 +2093,10 @@ ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm0 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1] -; X86-SSE-NEXT: movd %xmm2, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm6[2,3,0,1] -; X86-SSE-NEXT: movd %xmm2, %esi +; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1] +; X86-SSE-NEXT: movd %xmm3, %eax +; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1] +; X86-SSE-NEXT: movd %xmm3, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm7 @@ -2105,7 +2105,7 @@ ; X86-SSE-NEXT: movd %xmm6, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi -; X86-SSE-NEXT: movd %edx, %xmm2 +; X86-SSE-NEXT: movd %edx, %xmm3 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3] ; X86-SSE-NEXT: movd %xmm5, %eax ; X86-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3] @@ -2113,60 +2113,57 @@ ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm5 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] +; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3] ; X86-SSE-NEXT: movd %xmm6, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3] +; X86-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3] ; X86-SSE-NEXT: movd %xmm6, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm6 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1] ; X86-SSE-NEXT: movd %xmm7, %eax -; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1] +; X86-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1] ; X86-SSE-NEXT: movd %xmm7, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: movd %edx, %xmm7 ; X86-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] ; X86-SSE-NEXT: movd %xmm4, %eax -; X86-SSE-NEXT: movd %xmm3, %esi +; X86-SSE-NEXT: movd %xmm2, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi ; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3] ; X86-SSE-NEXT: movd %xmm4, %eax ; X86-SSE-NEXT: movd %edx, %xmm4 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3] -; X86-SSE-NEXT: movd %xmm3, %esi +; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3] +; X86-SSE-NEXT: movd %xmm2, %esi ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl %esi -; X86-SSE-NEXT: movd %edx, %xmm3 -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; X86-SSE-NEXT: movd %edx, %xmm2 +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] ; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0] ; X86-SSE-NEXT: movd %xmm1, %eax -; X86-SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm6[0,0] +; X86-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0] ; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199] ; X86-SSE-NEXT: pmuludq %xmm1, %xmm4 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3] -; X86-SSE-NEXT: pmuludq %xmm1, %xmm3 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; X86-SSE-NEXT: pmuludq %xmm1, %xmm2 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] ; X86-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm0[0,0] +; X86-SSE-NEXT: pmuludq %xmm1, %xmm3 +; X86-SSE-NEXT: pmuludq %xmm1, %xmm5 +; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3] +; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3] +; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; X86-SSE-NEXT: xorl %edx, %edx ; X86-SSE-NEXT: divl 32(%ecx) -; X86-SSE-NEXT: pmuludq %xmm1, %xmm2 -; X86-SSE-NEXT: pmuludq %xmm1, %xmm5 -; X86-SSE-NEXT: movd %edx, %xmm0 -; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm5[0,2,2,3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X86-SSE-NEXT: movl $8199, %eax # imm = 0x2007 -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: pmuludq %xmm0, %xmm2 -; X86-SSE-NEXT: movd %xmm2, (%eax) -; X86-SSE-NEXT: movdqa %xmm1, (%eax) +; X86-SSE-NEXT: movdqa %xmm0, (%eax) ; X86-SSE-NEXT: movdqa %xmm4, (%eax) +; X86-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007 +; X86-SSE-NEXT: movl %eax, (%eax) ; X86-SSE-NEXT: popl %esi ; X86-SSE-NEXT: retl ; @@ -2235,14 +2232,13 @@ ; X86-AVX1-NEXT: vpinsrd $1, (%esp), %xmm1, %xmm1 # 4-byte Folded Reload ; X86-AVX1-NEXT: vpinsrd $2, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload ; X86-AVX1-NEXT: vpinsrd $3, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 4-byte Folded Reload -; X86-AVX1-NEXT: vmovd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Folded Reload -; X86-AVX1-NEXT: # xmm2 = mem[0],zero,zero,zero -; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [8199,8199,8199,8199] -; X86-AVX1-NEXT: vpmulld %xmm3, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpmulld %xmm3, %xmm1, %xmm1 +; X86-AVX1-NEXT: imull $8199, {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-AVX1-NEXT: # imm = 0x2007 +; X86-AVX1-NEXT: movl %eax, (%eax) +; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] +; X86-AVX1-NEXT: vpmulld %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpmulld %xmm2, %xmm1, %xmm1 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X86-AVX1-NEXT: vpmulld %xmm3, %xmm2, %xmm1 -; X86-AVX1-NEXT: vmovd %xmm1, (%eax) ; X86-AVX1-NEXT: vmovaps %ymm0, (%eax) ; X86-AVX1-NEXT: addl $16, %esp ; X86-AVX1-NEXT: popl %esi @@ -2309,12 +2305,11 @@ ; X86-AVX2-NEXT: vmovd %xmm0, %eax ; X86-AVX2-NEXT: xorl %edx, %edx ; X86-AVX2-NEXT: divl 32(%esi) -; X86-AVX2-NEXT: vmovd %edx, %xmm0 -; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199] -; X86-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1 -; X86-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0 -; X86-AVX2-NEXT: vmovd %xmm0, (%eax) -; X86-AVX2-NEXT: vmovdqa %ymm1, (%eax) +; X86-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199] +; X86-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0 +; X86-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007 +; X86-AVX2-NEXT: movl %eax, (%eax) +; X86-AVX2-NEXT: vmovdqa %ymm0, (%eax) ; X86-AVX2-NEXT: popl %esi ; X86-AVX2-NEXT: popl %edi ; X86-AVX2-NEXT: vzeroupper @@ -2328,8 +2323,8 @@ ; X64-SSE-NEXT: movdqa 16(%rsi), %xmm6 ; X64-SSE-NEXT: pxor %xmm0, %xmm0 ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64-SSE-NEXT: movdqa %xmm5, %xmm4 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] +; X64-SSE-NEXT: movdqa %xmm5, %xmm3 +; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] ; X64-SSE-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm5[3,1,2,3] ; X64-SSE-NEXT: movd %xmm0, %eax @@ -2338,10 +2333,10 @@ ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm8 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[2,3,0,1] -; X64-SSE-NEXT: movd %xmm3, %eax -; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm6[2,3,0,1] -; X64-SSE-NEXT: movd %xmm3, %ecx +; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1] +; X64-SSE-NEXT: movd %xmm4, %eax +; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm6[2,3,0,1] +; X64-SSE-NEXT: movd %xmm4, %ecx ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm7 @@ -2350,7 +2345,7 @@ ; X64-SSE-NEXT: movd %xmm6, %ecx ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx -; X64-SSE-NEXT: movd %edx, %xmm3 +; X64-SSE-NEXT: movd %edx, %xmm4 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,2,3] ; X64-SSE-NEXT: movd %xmm5, %eax ; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,2,3] @@ -2358,16 +2353,16 @@ ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm5 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm4[3,1,2,3] +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] +; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0] +; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm3[3,1,2,3] ; X64-SSE-NEXT: movd %xmm6, %eax ; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm2[3,1,2,3] ; X64-SSE-NEXT: movd %xmm6, %ecx ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm6 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,3,0,1] +; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm3[2,3,0,1] ; X64-SSE-NEXT: movd %xmm7, %eax ; X64-SSE-NEXT: pshufd {{.*#+}} xmm7 = xmm2[2,3,0,1] ; X64-SSE-NEXT: movd %xmm7, %ecx @@ -2375,13 +2370,13 @@ ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm7 ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm6[0],xmm7[1],xmm6[1] -; X64-SSE-NEXT: movd %xmm4, %eax +; X64-SSE-NEXT: movd %xmm3, %eax ; X64-SSE-NEXT: movd %xmm2, %ecx ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl %ecx ; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,2,3] -; X64-SSE-NEXT: movd %xmm4, %eax +; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3] +; X64-SSE-NEXT: movd %xmm3, %eax ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,2,3] ; X64-SSE-NEXT: movd %xmm2, %ecx ; X64-SSE-NEXT: xorl %edx, %edx @@ -2392,24 +2387,21 @@ ; X64-SSE-NEXT: movd %xmm1, %eax ; X64-SSE-NEXT: xorl %edx, %edx ; X64-SSE-NEXT: divl 32(%rsi) -; X64-SSE-NEXT: movd %edx, %xmm1 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm4 = [8199,8199,8199,8199] -; X64-SSE-NEXT: pmuludq %xmm4, %xmm0 +; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [8199,8199,8199,8199] +; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; X64-SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm6[0,0] -; X64-SSE-NEXT: pmuludq %xmm4, %xmm2 +; X64-SSE-NEXT: pmuludq %xmm1, %xmm2 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3] ; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-SSE-NEXT: pmuludq %xmm4, %xmm3 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] +; X64-SSE-NEXT: pmuludq %xmm1, %xmm4 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[0,2,2,3] ; X64-SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,0],xmm8[0,0] -; X64-SSE-NEXT: pmuludq %xmm4, %xmm5 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; X64-SSE-NEXT: movl $8199, %eax # imm = 0x2007 -; X64-SSE-NEXT: movd %eax, %xmm3 -; X64-SSE-NEXT: pmuludq %xmm1, %xmm3 -; X64-SSE-NEXT: movd %xmm3, (%rax) +; X64-SSE-NEXT: pmuludq %xmm1, %xmm5 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm5[0,2,2,3] +; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-SSE-NEXT: imull $8199, %edx, %eax # imm = 0x2007 +; X64-SSE-NEXT: movl %eax, (%rax) ; X64-SSE-NEXT: movdqa %xmm2, (%rax) ; X64-SSE-NEXT: movdqa %xmm0, (%rax) ; X64-SSE-NEXT: retq @@ -2476,11 +2468,10 @@ ; X64-AVX1-NEXT: vpinsrd $1, %r11d, %xmm2, %xmm2 ; X64-AVX1-NEXT: vpinsrd $2, %r10d, %xmm2, %xmm2 ; X64-AVX1-NEXT: vpinsrd $3, %r9d, %xmm2, %xmm2 -; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm2 -; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 -; X64-AVX1-NEXT: vmovd %r8d, %xmm2 ; X64-AVX1-NEXT: vpmulld %xmm1, %xmm2, %xmm1 -; X64-AVX1-NEXT: vmovd %xmm1, (%rax) +; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; X64-AVX1-NEXT: imull $8199, %r8d, %eax # imm = 0x2007 +; X64-AVX1-NEXT: movl %eax, (%rax) ; X64-AVX1-NEXT: vmovaps %ymm0, (%rax) ; X64-AVX1-NEXT: popq %rbx ; X64-AVX1-NEXT: popq %rbp @@ -2540,12 +2531,11 @@ ; X64-AVX2-NEXT: vmovd %xmm0, %eax ; X64-AVX2-NEXT: xorl %edx, %edx ; X64-AVX2-NEXT: divl 32(%rsi) -; X64-AVX2-NEXT: vmovd %edx, %xmm0 -; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [8199,8199,8199,8199,8199,8199,8199,8199] -; X64-AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1 -; X64-AVX2-NEXT: vpmulld %xmm2, %xmm0, %xmm0 -; X64-AVX2-NEXT: vmovd %xmm0, (%rax) -; X64-AVX2-NEXT: vmovdqa %ymm1, (%rax) +; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8199,8199,8199,8199,8199,8199,8199,8199] +; X64-AVX2-NEXT: vpmulld %ymm0, %ymm1, %ymm0 +; X64-AVX2-NEXT: imull $8199, %edx, %eax # imm = 0x2007 +; X64-AVX2-NEXT: movl %eax, (%rax) +; X64-AVX2-NEXT: vmovdqa %ymm0, (%rax) ; X64-AVX2-NEXT: vzeroupper ; X64-AVX2-NEXT: retq %a0 = load <9 x i16>, <9 x i16>* %p0, align 64 Index: llvm/trunk/test/CodeGen/X86/vector-fshl-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-fshl-128.ll +++ llvm/trunk/test/CodeGen/X86/vector-fshl-128.ll @@ -1294,12 +1294,13 @@ ; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3] ; SSE2-NEXT: movdqa %xmm0, %xmm5 ; SSE2-NEXT: pslld %xmm4, %xmm5 -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32] -; SSE2-NEXT: psubd %xmm2, %xmm4 -; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 -; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3] -; SSE2-NEXT: psrld %xmm3, %xmm1 +; SSE2-NEXT: movd %xmm2, %eax +; SSE2-NEXT: movl $32, %ecx +; SSE2-NEXT: subl %eax, %ecx +; SSE2-NEXT: movd %ecx, %xmm4 +; SSE2-NEXT: psrld %xmm4, %xmm1 ; SSE2-NEXT: por %xmm5, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: pandn %xmm1, %xmm2 ; SSE2-NEXT: por %xmm2, %xmm0 @@ -1471,12 +1472,13 @@ ; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3] ; X32-SSE-NEXT: movdqa %xmm0, %xmm5 ; X32-SSE-NEXT: pslld %xmm4, %xmm5 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32] -; X32-SSE-NEXT: psubd %xmm2, %xmm4 -; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2 -; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3] -; X32-SSE-NEXT: psrld %xmm3, %xmm1 +; X32-SSE-NEXT: movd %xmm2, %eax +; X32-SSE-NEXT: movl $32, %ecx +; X32-SSE-NEXT: subl %eax, %ecx +; X32-SSE-NEXT: movd %ecx, %xmm4 +; X32-SSE-NEXT: psrld %xmm4, %xmm1 ; X32-SSE-NEXT: por %xmm5, %xmm1 +; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2 ; X32-SSE-NEXT: pand %xmm2, %xmm0 ; X32-SSE-NEXT: pandn %xmm1, %xmm2 ; X32-SSE-NEXT: por %xmm2, %xmm0 Index: llvm/trunk/test/CodeGen/X86/vector-fshl-rot-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-fshl-rot-128.ll +++ llvm/trunk/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -783,18 +783,16 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v4i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: xorps %xmm3, %xmm3 -; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: pslld %xmm3, %xmm4 -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32] -; SSE2-NEXT: psubd %xmm1, %xmm3 -; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3] -; SSE2-NEXT: psrld %xmm2, %xmm0 -; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $31, %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pslld %xmm1, %xmm2 +; SSE2-NEXT: movl $32, %ecx +; SSE2-NEXT: subl %eax, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: psrld %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatvar_funnnel_v4i32: @@ -882,18 +880,16 @@ ; ; X32-SSE-LABEL: splatvar_funnnel_v4i32: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 -; X32-SSE-NEXT: xorps %xmm2, %xmm2 -; X32-SSE-NEXT: xorps %xmm3, %xmm3 -; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] -; X32-SSE-NEXT: movdqa %xmm0, %xmm4 -; X32-SSE-NEXT: pslld %xmm3, %xmm4 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32] -; X32-SSE-NEXT: psubd %xmm1, %xmm3 -; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3] -; X32-SSE-NEXT: psrld %xmm2, %xmm0 -; X32-SSE-NEXT: por %xmm4, %xmm0 +; X32-SSE-NEXT: movd %xmm1, %eax +; X32-SSE-NEXT: andl $31, %eax +; X32-SSE-NEXT: movd %eax, %xmm1 +; X32-SSE-NEXT: movdqa %xmm0, %xmm2 +; X32-SSE-NEXT: pslld %xmm1, %xmm2 +; X32-SSE-NEXT: movl $32, %ecx +; X32-SSE-NEXT: subl %eax, %ecx +; X32-SSE-NEXT: movd %ecx, %xmm1 +; X32-SSE-NEXT: psrld %xmm1, %xmm0 +; X32-SSE-NEXT: por %xmm2, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat) Index: llvm/trunk/test/CodeGen/X86/vector-fshr-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-fshr-128.ll +++ llvm/trunk/test/CodeGen/X86/vector-fshr-128.ll @@ -1309,12 +1309,13 @@ ; SSE2-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3] ; SSE2-NEXT: movdqa %xmm1, %xmm5 ; SSE2-NEXT: psrld %xmm4, %xmm5 -; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32] -; SSE2-NEXT: psubd %xmm2, %xmm4 -; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 -; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3] -; SSE2-NEXT: pslld %xmm3, %xmm0 +; SSE2-NEXT: movd %xmm2, %eax +; SSE2-NEXT: movl $32, %ecx +; SSE2-NEXT: subl %eax, %ecx +; SSE2-NEXT: movd %ecx, %xmm4 +; SSE2-NEXT: pslld %xmm4, %xmm0 ; SSE2-NEXT: por %xmm5, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pandn %xmm0, %xmm2 ; SSE2-NEXT: por %xmm1, %xmm2 @@ -1485,12 +1486,13 @@ ; X32-SSE-NEXT: movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3] ; X32-SSE-NEXT: movdqa %xmm1, %xmm5 ; X32-SSE-NEXT: psrld %xmm4, %xmm5 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm4 = [32,32,32,32] -; X32-SSE-NEXT: psubd %xmm2, %xmm4 -; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2 -; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3] -; X32-SSE-NEXT: pslld %xmm3, %xmm0 +; X32-SSE-NEXT: movd %xmm2, %eax +; X32-SSE-NEXT: movl $32, %ecx +; X32-SSE-NEXT: subl %eax, %ecx +; X32-SSE-NEXT: movd %ecx, %xmm4 +; X32-SSE-NEXT: pslld %xmm4, %xmm0 ; X32-SSE-NEXT: por %xmm5, %xmm0 +; X32-SSE-NEXT: pcmpeqd %xmm3, %xmm2 ; X32-SSE-NEXT: pand %xmm2, %xmm1 ; X32-SSE-NEXT: pandn %xmm0, %xmm2 ; X32-SSE-NEXT: por %xmm1, %xmm2 Index: llvm/trunk/test/CodeGen/X86/vector-fshr-rot-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-fshr-rot-128.ll +++ llvm/trunk/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -828,20 +828,17 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v4i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: psubd %xmm1, %xmm3 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm3 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3] -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: pslld %xmm1, %xmm4 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [32,32,32,32] -; SSE2-NEXT: psubd %xmm3, %xmm1 -; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] -; SSE2-NEXT: psrld %xmm2, %xmm0 -; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: negl %eax +; SSE2-NEXT: andl $31, %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pslld %xmm1, %xmm2 +; SSE2-NEXT: movl $32, %ecx +; SSE2-NEXT: subl %eax, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: psrld %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatvar_funnnel_v4i32: @@ -939,20 +936,17 @@ ; ; X32-SSE-LABEL: splatvar_funnnel_v4i32: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; X32-SSE-NEXT: xorps %xmm2, %xmm2 -; X32-SSE-NEXT: pxor %xmm3, %xmm3 -; X32-SSE-NEXT: psubd %xmm1, %xmm3 -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm3 -; X32-SSE-NEXT: pxor %xmm1, %xmm1 -; X32-SSE-NEXT: movss {{.*#+}} xmm1 = xmm3[0],xmm1[1,2,3] -; X32-SSE-NEXT: movdqa %xmm0, %xmm4 -; X32-SSE-NEXT: pslld %xmm1, %xmm4 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm1 = [32,32,32,32] -; X32-SSE-NEXT: psubd %xmm3, %xmm1 -; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] -; X32-SSE-NEXT: psrld %xmm2, %xmm0 -; X32-SSE-NEXT: por %xmm4, %xmm0 +; X32-SSE-NEXT: movd %xmm1, %eax +; X32-SSE-NEXT: negl %eax +; X32-SSE-NEXT: andl $31, %eax +; X32-SSE-NEXT: movd %eax, %xmm1 +; X32-SSE-NEXT: movdqa %xmm0, %xmm2 +; X32-SSE-NEXT: pslld %xmm1, %xmm2 +; X32-SSE-NEXT: movl $32, %ecx +; X32-SSE-NEXT: subl %eax, %ecx +; X32-SSE-NEXT: movd %ecx, %xmm1 +; X32-SSE-NEXT: psrld %xmm1, %xmm0 +; X32-SSE-NEXT: por %xmm2, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat) Index: llvm/trunk/test/CodeGen/X86/vector-gep.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-gep.ll +++ llvm/trunk/test/CodeGen/X86/vector-gep.ll @@ -20,9 +20,8 @@ define i32 @AGEP1(<4 x i32*> %param) nounwind { ; CHECK-LABEL: AGEP1: ; CHECK: # %bb.0: -; CHECK-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 -; CHECK-NEXT: vpextrd $3, %xmm0, %eax -; CHECK-NEXT: movl (%eax), %eax +; CHECK-NEXT: vextractps $3, %xmm0, %eax +; CHECK-NEXT: movl 16(%eax), %eax ; CHECK-NEXT: retl %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %k = extractelement <4 x i32*> %A2, i32 3 Index: llvm/trunk/test/CodeGen/X86/vector-reduce-smax-widen.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-smax-widen.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-smax-widen.ll @@ -1120,31 +1120,28 @@ ; ; SSE41-LABEL: test_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0) @@ -1169,11 +1166,10 @@ ; SSE41-LABEL: test_v16i16: ; SSE41: # %bb.0: ; SSE41-NEXT: pmaxsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1181,11 +1177,10 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1194,11 +1189,10 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1207,11 +1201,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1241,11 +1234,10 @@ ; SSE41-NEXT: pmaxsw %xmm3, %xmm1 ; SSE41-NEXT: pmaxsw %xmm2, %xmm0 ; SSE41-NEXT: pmaxsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1256,11 +1248,10 @@ ; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1270,11 +1261,10 @@ ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1285,11 +1275,10 @@ ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1327,11 +1316,10 @@ ; SSE41-NEXT: pmaxsw %xmm4, %xmm0 ; SSE41-NEXT: pmaxsw %xmm2, %xmm0 ; SSE41-NEXT: pmaxsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1348,11 +1336,10 @@ ; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1364,11 +1351,10 @@ ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1380,11 +1366,10 @@ ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1597,38 +1582,35 @@ ; ; SSE41-LABEL: test_v16i8: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16i8: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: xorb $127, %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0) @@ -1676,14 +1658,13 @@ ; SSE41-LABEL: test_v32i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1691,13 +1672,12 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $127, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1706,13 +1686,12 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $127, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1721,13 +1700,12 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1788,14 +1766,13 @@ ; SSE41-NEXT: pmaxsb %xmm3, %xmm1 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1806,13 +1783,12 @@ ; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $127, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1822,13 +1798,12 @@ ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $127, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1839,13 +1814,12 @@ ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1930,14 +1904,13 @@ ; SSE41-NEXT: pmaxsb %xmm4, %xmm0 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1954,13 +1927,12 @@ ; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $127, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1972,13 +1944,12 @@ ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $127, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1990,13 +1961,12 @@ ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll @@ -1340,31 +1340,28 @@ ; ; SSE41-LABEL: test_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq %1 = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %a0) @@ -1389,11 +1386,10 @@ ; SSE41-LABEL: test_v16i16: ; SSE41: # %bb.0: ; SSE41-NEXT: pmaxsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1401,11 +1397,10 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1414,11 +1409,10 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1427,11 +1421,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1461,11 +1454,10 @@ ; SSE41-NEXT: pmaxsw %xmm3, %xmm1 ; SSE41-NEXT: pmaxsw %xmm2, %xmm0 ; SSE41-NEXT: pmaxsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1476,11 +1468,10 @@ ; AVX1-NEXT: vpmaxsw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1490,11 +1481,10 @@ ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1505,11 +1495,10 @@ ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1547,11 +1536,10 @@ ; SSE41-NEXT: pmaxsw %xmm4, %xmm0 ; SSE41-NEXT: pmaxsw %xmm2, %xmm0 ; SSE41-NEXT: pmaxsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32767, %eax # imm = 0x7FFF ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1568,11 +1556,10 @@ ; AVX1-NEXT: vpmaxsw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1584,11 +1571,10 @@ ; AVX2-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1600,11 +1586,10 @@ ; AVX512-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767,32767,32767,32767,32767,32767,32767] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32767, %eax # imm = 0x7FFF ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1947,38 +1932,35 @@ ; ; SSE41-LABEL: test_v16i8: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16i8: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: xorb $127, %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %a0) @@ -2026,14 +2008,13 @@ ; SSE41-LABEL: test_v32i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2041,13 +2022,12 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $127, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2056,13 +2036,12 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $127, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2071,13 +2050,12 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -2138,14 +2116,13 @@ ; SSE41-NEXT: pmaxsb %xmm3, %xmm1 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2156,13 +2133,12 @@ ; AVX1-NEXT: vpmaxsb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $127, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2172,13 +2148,12 @@ ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $127, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2189,13 +2164,12 @@ ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -2280,14 +2254,13 @@ ; SSE41-NEXT: pmaxsb %xmm4, %xmm0 ; SSE41-NEXT: pmaxsb %xmm2, %xmm0 ; SSE41-NEXT: pmaxsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $127, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2304,13 +2277,12 @@ ; AVX1-NEXT: vpmaxsb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpmaxsb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $127, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2322,13 +2294,12 @@ ; AVX2-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $127, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2340,13 +2311,12 @@ ; AVX512-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $127, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vector-reduce-smin-widen.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-smin-widen.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-smin-widen.ll @@ -1119,31 +1119,28 @@ ; ; SSE41-LABEL: test_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0) @@ -1168,11 +1165,10 @@ ; SSE41-LABEL: test_v16i16: ; SSE41: # %bb.0: ; SSE41-NEXT: pminsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1180,11 +1176,10 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1193,11 +1188,10 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1206,11 +1200,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1240,11 +1233,10 @@ ; SSE41-NEXT: pminsw %xmm3, %xmm1 ; SSE41-NEXT: pminsw %xmm2, %xmm0 ; SSE41-NEXT: pminsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1255,11 +1247,10 @@ ; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1269,11 +1260,10 @@ ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1284,11 +1274,10 @@ ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1326,11 +1315,10 @@ ; SSE41-NEXT: pminsw %xmm4, %xmm0 ; SSE41-NEXT: pminsw %xmm2, %xmm0 ; SSE41-NEXT: pminsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1347,11 +1335,10 @@ ; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpminsw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1363,11 +1350,10 @@ ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1379,11 +1365,10 @@ ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1596,38 +1581,35 @@ ; ; SSE41-LABEL: test_v16i8: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16i8: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: xorb $-128, %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0) @@ -1675,14 +1657,13 @@ ; SSE41-LABEL: test_v32i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1690,13 +1671,12 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $-128, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1705,13 +1685,12 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $-128, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1720,13 +1699,12 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1787,14 +1765,13 @@ ; SSE41-NEXT: pminsb %xmm3, %xmm1 ; SSE41-NEXT: pminsb %xmm2, %xmm0 ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1805,13 +1782,12 @@ ; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $-128, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1821,13 +1797,12 @@ ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $-128, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1838,13 +1813,12 @@ ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1929,14 +1903,13 @@ ; SSE41-NEXT: pminsb %xmm4, %xmm0 ; SSE41-NEXT: pminsb %xmm2, %xmm0 ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1953,13 +1926,12 @@ ; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpminsb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $-128, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1971,13 +1943,12 @@ ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $-128, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1989,13 +1960,12 @@ ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll @@ -1339,31 +1339,28 @@ ; ; SSE41-LABEL: test_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v8i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: retq %1 = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %a0) @@ -1388,11 +1385,10 @@ ; SSE41-LABEL: test_v16i16: ; SSE41: # %bb.0: ; SSE41-NEXT: pminsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1400,11 +1396,10 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1413,11 +1408,10 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1426,11 +1420,10 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1460,11 +1453,10 @@ ; SSE41-NEXT: pminsw %xmm3, %xmm1 ; SSE41-NEXT: pminsw %xmm2, %xmm0 ; SSE41-NEXT: pminsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1475,11 +1467,10 @@ ; AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1489,11 +1480,10 @@ ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1504,11 +1494,10 @@ ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1546,11 +1535,10 @@ ; SSE41-NEXT: pminsw %xmm4, %xmm0 ; SSE41-NEXT: pminsw %xmm2, %xmm0 ; SSE41-NEXT: pminsw %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 ; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1567,11 +1555,10 @@ ; AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpminsw %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1583,11 +1570,10 @@ ; AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1599,11 +1585,10 @@ ; AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: xorl $32768, %eax # imm = 0x8000 ; AVX512-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1946,38 +1931,35 @@ ; ; SSE41-LABEL: test_v16i8: ; SSE41: # %bb.0: -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; ; AVX-LABEL: test_v16i8: ; AVX: # %bb.0: -; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: xorb $-128, %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; ; AVX512-LABEL: test_v16i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %a0) @@ -2025,14 +2007,13 @@ ; SSE41-LABEL: test_v32i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2040,13 +2021,12 @@ ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $-128, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2055,13 +2035,12 @@ ; AVX2: # %bb.0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $-128, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2070,13 +2049,12 @@ ; AVX512: # %bb.0: ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -2137,14 +2115,13 @@ ; SSE41-NEXT: pminsb %xmm3, %xmm1 ; SSE41-NEXT: pminsb %xmm2, %xmm0 ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2155,13 +2132,12 @@ ; AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $-128, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2171,13 +2147,12 @@ ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $-128, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2188,13 +2163,12 @@ ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -2279,14 +2253,13 @@ ; SSE41-NEXT: pminsb %xmm4, %xmm0 ; SSE41-NEXT: pminsb %xmm2, %xmm0 ; SSE41-NEXT: pminsb %xmm1, %xmm0 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor {{.*}}(%rip), %xmm0 +; SSE41-NEXT: movdqa %xmm0, %xmm1 +; SSE41-NEXT: psrlw $8, %xmm1 +; SSE41-NEXT: pminub %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: xorb $-128, %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2303,13 +2276,12 @@ ; AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpminsb %xmm4, %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: xorb $-128, %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2321,13 +2293,12 @@ ; AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: xorb $-128, %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2339,13 +2310,12 @@ ; AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX512-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpextrb $0, %xmm0, %eax +; AVX512-NEXT: xorb $-128, %al ; AVX512-NEXT: # kill: def $al killed $al killed $eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vector-reduce-umax-widen.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-umax-widen.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-umax-widen.ll @@ -1166,8 +1166,8 @@ ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1212,8 +1212,8 @@ ; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1269,18 +1269,18 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1289,8 +1289,8 @@ ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: notl %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq ; @@ -1299,8 +1299,8 @@ ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: notl %eax ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1309,8 +1309,8 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovd %xmm0, %eax +; AVX512VL-NEXT: notl %eax ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: retq %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0) @@ -1339,8 +1339,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1348,10 +1348,10 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: pmaxuw %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1362,8 +1362,8 @@ ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: notl %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1375,8 +1375,8 @@ ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: notl %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1387,8 +1387,8 @@ ; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: notl %eax ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1399,8 +1399,8 @@ ; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovd %xmm0, %eax +; AVX512VL-NEXT: notl %eax ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1434,8 +1434,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1445,10 +1445,10 @@ ; SSE41-NEXT: pmaxuw %xmm2, %xmm0 ; SSE41-NEXT: pmaxuw %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1462,8 +1462,8 @@ ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: notl %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1476,8 +1476,8 @@ ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: notl %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1490,8 +1490,8 @@ ; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: notl %eax ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1504,8 +1504,8 @@ ; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovd %xmm0, %eax +; AVX512VL-NEXT: notl %eax ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1547,8 +1547,8 @@ ; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1562,10 +1562,10 @@ ; SSE41-NEXT: pmaxuw %xmm2, %xmm0 ; SSE41-NEXT: pmaxuw %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1585,8 +1585,8 @@ ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: notl %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1601,8 +1601,8 @@ ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: notl %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1616,8 +1616,8 @@ ; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: notl %eax ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1631,8 +1631,8 @@ ; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovd %xmm0, %eax +; AVX512VL-NEXT: notl %eax ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1806,13 +1806,13 @@ ; SSE41-LABEL: test_v16i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pminub %xmm1, %xmm0 +; SSE41-NEXT: phminposuw %xmm0, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1820,11 +1820,11 @@ ; AVX: # %bb.0: ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: notb %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -1835,8 +1835,8 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1847,8 +1847,8 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0) @@ -1877,13 +1877,13 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: pmaxub %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pminub %xmm1, %xmm0 +; SSE41-NEXT: phminposuw %xmm0, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1893,11 +1893,11 @@ ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: notb %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1908,11 +1908,11 @@ ; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: notb %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1925,8 +1925,8 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1939,8 +1939,8 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1974,13 +1974,13 @@ ; SSE41-NEXT: pmaxub %xmm2, %xmm0 ; SSE41-NEXT: pmaxub %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pminub %xmm1, %xmm0 +; SSE41-NEXT: phminposuw %xmm0, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -1993,11 +1993,11 @@ ; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: notb %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2009,11 +2009,11 @@ ; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: notb %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2028,8 +2028,8 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2044,8 +2044,8 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -2087,13 +2087,13 @@ ; SSE41-NEXT: pmaxub %xmm2, %xmm0 ; SSE41-NEXT: pmaxub %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pminub %xmm1, %xmm0 +; SSE41-NEXT: phminposuw %xmm0, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2112,11 +2112,11 @@ ; AVX1-NEXT: vpmaxub %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: notb %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2130,11 +2130,11 @@ ; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: notb %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2150,8 +2150,8 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2167,8 +2167,8 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll @@ -1389,18 +1389,18 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v8i16: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1409,8 +1409,8 @@ ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: notl %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: retq ; @@ -1419,8 +1419,8 @@ ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: notl %eax ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1429,8 +1429,8 @@ ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovd %xmm0, %eax +; AVX512VL-NEXT: notl %eax ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: retq %1 = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %a0) @@ -1459,8 +1459,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1468,10 +1468,10 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: pmaxuw %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1482,8 +1482,8 @@ ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: notl %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1495,8 +1495,8 @@ ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: notl %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1507,8 +1507,8 @@ ; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: notl %eax ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1519,8 +1519,8 @@ ; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovd %xmm0, %eax +; AVX512VL-NEXT: notl %eax ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1554,8 +1554,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1565,10 +1565,10 @@ ; SSE41-NEXT: pmaxuw %xmm2, %xmm0 ; SSE41-NEXT: pmaxuw %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1582,8 +1582,8 @@ ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: notl %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1596,8 +1596,8 @@ ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: notl %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1610,8 +1610,8 @@ ; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: notl %eax ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1624,8 +1624,8 @@ ; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovd %xmm0, %eax +; AVX512VL-NEXT: notl %eax ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -1667,8 +1667,8 @@ ; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1682,10 +1682,10 @@ ; SSE41-NEXT: pmaxuw %xmm2, %xmm0 ; SSE41-NEXT: pmaxuw %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: phminposuw %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: phminposuw %xmm1, %xmm0 ; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: notl %eax ; SSE41-NEXT: # kill: def $ax killed $ax killed $eax ; SSE41-NEXT: retq ; @@ -1705,8 +1705,8 @@ ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: notl %eax ; AVX1-NEXT: # kill: def $ax killed $ax killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1721,8 +1721,8 @@ ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: notl %eax ; AVX2-NEXT: # kill: def $ax killed $ax killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1736,8 +1736,8 @@ ; AVX512BW-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax +; AVX512BW-NEXT: notl %eax ; AVX512BW-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1751,8 +1751,8 @@ ; AVX512VL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovd %xmm0, %eax +; AVX512VL-NEXT: notl %eax ; AVX512VL-NEXT: # kill: def $ax killed $ax killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -2052,13 +2052,13 @@ ; SSE41-LABEL: test_v16i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pminub %xmm1, %xmm0 +; SSE41-NEXT: phminposuw %xmm0, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2066,11 +2066,11 @@ ; AVX: # %bb.0: ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vphminposuw %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpextrb $0, %xmm0, %eax +; AVX-NEXT: notb %al ; AVX-NEXT: # kill: def $al killed $al killed $eax ; AVX-NEXT: retq ; @@ -2081,8 +2081,8 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2093,8 +2093,8 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: retq %1 = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %a0) @@ -2123,13 +2123,13 @@ ; SSE41: # %bb.0: ; SSE41-NEXT: pmaxub %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pminub %xmm1, %xmm0 +; SSE41-NEXT: phminposuw %xmm0, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2139,11 +2139,11 @@ ; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: notb %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2154,11 +2154,11 @@ ; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: notb %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2171,8 +2171,8 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2185,8 +2185,8 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -2220,13 +2220,13 @@ ; SSE41-NEXT: pmaxub %xmm2, %xmm0 ; SSE41-NEXT: pmaxub %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pminub %xmm1, %xmm0 +; SSE41-NEXT: phminposuw %xmm0, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2239,11 +2239,11 @@ ; AVX1-NEXT: vpmaxub %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: notb %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2255,11 +2255,11 @@ ; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: notb %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2274,8 +2274,8 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2290,8 +2290,8 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -2333,13 +2333,13 @@ ; SSE41-NEXT: pmaxub %xmm2, %xmm0 ; SSE41-NEXT: pmaxub %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE41-NEXT: pxor %xmm1, %xmm0 -; SSE41-NEXT: movdqa %xmm0, %xmm2 -; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pminub %xmm0, %xmm2 -; SSE41-NEXT: phminposuw %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 +; SSE41-NEXT: psrlw $8, %xmm0 +; SSE41-NEXT: pminub %xmm1, %xmm0 +; SSE41-NEXT: phminposuw %xmm0, %xmm0 ; SSE41-NEXT: pextrb $0, %xmm0, %eax +; SSE41-NEXT: notb %al ; SSE41-NEXT: # kill: def $al killed $al killed $eax ; SSE41-NEXT: retq ; @@ -2358,11 +2358,11 @@ ; AVX1-NEXT: vpmaxub %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vphminposuw %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vpextrb $0, %xmm0, %eax +; AVX1-NEXT: notb %al ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -2376,11 +2376,11 @@ ; AVX2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm2 -; AVX2-NEXT: vpminub %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 +; AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphminposuw %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpextrb $0, %xmm0, %eax +; AVX2-NEXT: notb %al ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -2396,8 +2396,8 @@ ; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512BW-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax +; AVX512BW-NEXT: notb %al ; AVX512BW-NEXT: # kill: def $al killed $al killed $eax ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2413,8 +2413,8 @@ ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512VL-NEXT: vpminub %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vphminposuw %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 ; AVX512VL-NEXT: vpextrb $0, %xmm0, %eax +; AVX512VL-NEXT: notb %al ; AVX512VL-NEXT: # kill: def $al killed $al killed $eax ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vector-reduce-umin-widen.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-umin-widen.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-umin-widen.ll @@ -1165,8 +1165,8 @@ ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1211,8 +1211,8 @@ ; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pminsw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1268,8 +1268,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1319,8 +1319,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1391,8 +1391,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1479,8 +1479,8 @@ ; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: pminsw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll +++ llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll @@ -1388,8 +1388,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1439,8 +1439,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1511,8 +1511,8 @@ ; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; @@ -1599,8 +1599,8 @@ ; SSE2-NEXT: psrld $16, %xmm0 ; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: pminsw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 ; SSE2-NEXT: # kill: def $ax killed $ax killed $eax ; SSE2-NEXT: retq ; Index: llvm/trunk/test/CodeGen/X86/vector-rotate-128.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-rotate-128.ll +++ llvm/trunk/test/CodeGen/X86/vector-rotate-128.ll @@ -738,18 +738,16 @@ define <4 x i32> @splatvar_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; SSE2-LABEL: splatvar_rotate_v4i32: ; SSE2: # %bb.0: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE2-NEXT: xorps %xmm2, %xmm2 -; SSE2-NEXT: xorps %xmm3, %xmm3 -; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: pslld %xmm3, %xmm4 -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32] -; SSE2-NEXT: psubd %xmm1, %xmm3 -; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3] -; SSE2-NEXT: psrld %xmm2, %xmm0 -; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: movd %xmm1, %eax +; SSE2-NEXT: andl $31, %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pslld %xmm1, %xmm2 +; SSE2-NEXT: movl $32, %ecx +; SSE2-NEXT: subl %eax, %ecx +; SSE2-NEXT: movd %ecx, %xmm1 +; SSE2-NEXT: psrld %xmm1, %xmm0 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatvar_rotate_v4i32: @@ -837,18 +835,16 @@ ; ; X32-SSE-LABEL: splatvar_rotate_v4i32: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] -; X32-SSE-NEXT: pand {{\.LCPI.*}}, %xmm1 -; X32-SSE-NEXT: xorps %xmm2, %xmm2 -; X32-SSE-NEXT: xorps %xmm3, %xmm3 -; X32-SSE-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3] -; X32-SSE-NEXT: movdqa %xmm0, %xmm4 -; X32-SSE-NEXT: pslld %xmm3, %xmm4 -; X32-SSE-NEXT: movdqa {{.*#+}} xmm3 = [32,32,32,32] -; X32-SSE-NEXT: psubd %xmm1, %xmm3 -; X32-SSE-NEXT: movss {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3] -; X32-SSE-NEXT: psrld %xmm2, %xmm0 -; X32-SSE-NEXT: por %xmm4, %xmm0 +; X32-SSE-NEXT: movd %xmm1, %eax +; X32-SSE-NEXT: andl $31, %eax +; X32-SSE-NEXT: movd %eax, %xmm1 +; X32-SSE-NEXT: movdqa %xmm0, %xmm2 +; X32-SSE-NEXT: pslld %xmm1, %xmm2 +; X32-SSE-NEXT: movl $32, %ecx +; X32-SSE-NEXT: subl %eax, %ecx +; X32-SSE-NEXT: movd %ecx, %xmm1 +; X32-SSE-NEXT: psrld %xmm1, %xmm0 +; X32-SSE-NEXT: por %xmm2, %xmm0 ; X32-SSE-NEXT: retl %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer %splat32 = sub <4 x i32> , %splat Index: llvm/trunk/test/CodeGen/X86/xor.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/xor.ll +++ llvm/trunk/test/CodeGen/X86/xor.ll @@ -420,26 +420,14 @@ ; X64-LIN-LABEL: PR17487: ; X64-LIN: # %bb.0: ; X64-LIN-NEXT: movd %edi, %xmm0 -; X64-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; X64-LIN-NEXT: pandn {{.*}}(%rip), %xmm0 -; X64-LIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-LIN-NEXT: movq %xmm0, %rcx -; X64-LIN-NEXT: xorl %eax, %eax -; X64-LIN-NEXT: cmpq $1, %rcx -; X64-LIN-NEXT: setne %al +; X64-LIN-NEXT: pextrw $0, %xmm0, %eax +; X64-LIN-NEXT: andl $1, %eax ; X64-LIN-NEXT: retq ; ; X64-WIN-LABEL: PR17487: ; X64-WIN: # %bb.0: +; X64-WIN-NEXT: andb $1, %cl ; X64-WIN-NEXT: movzbl %cl, %eax -; X64-WIN-NEXT: movd %eax, %xmm0 -; X64-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; X64-WIN-NEXT: pandn __xmm@{{.*}}(%rip), %xmm0 -; X64-WIN-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; X64-WIN-NEXT: movq %xmm0, %rcx -; X64-WIN-NEXT: xorl %eax, %eax -; X64-WIN-NEXT: cmpq $1, %rcx -; X64-WIN-NEXT: setne %al ; X64-WIN-NEXT: retq %tmp = insertelement <2 x i1> undef, i1 %tobool, i32 1 %tmp1 = zext <2 x i1> %tmp to <2 x i64>