diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43168,18 +43168,20 @@ /// Do target-specific dag combines on floating point negations. static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { EVT OrigVT = N->getValueType(0); SDValue Arg = isFNEG(DAG, N); if (!Arg) return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = Arg.getValueType(); EVT SVT = VT.getScalarType(); SDLoc DL(N); // Let legalize expand this if it isn't a legal type yet. - if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT)) return SDValue(); // If we're negating a FMUL node on a target with FMA, then we can avoid the @@ -43193,26 +43195,12 @@ return DAG.getBitcast(OrigVT, NewNode); } - // If we're negating an FMA node, then we can adjust the - // instruction to include the extra negation. - if (Arg.hasOneUse() && Subtarget.hasAnyFMA()) { - switch (Arg.getOpcode()) { - case ISD::FMA: - case X86ISD::FMSUB: - case X86ISD::FNMADD: - case X86ISD::FNMSUB: - case X86ISD::FMADD_RND: - case X86ISD::FMSUB_RND: - case X86ISD::FNMADD_RND: - case X86ISD::FNMSUB_RND: { - // We can't handle scalar intrinsic node here because it would only - // invert one element and not the whole vector. But we could try to handle - // a negation of the lower element only. - unsigned NewOpcode = negateFMAOpcode(Arg.getOpcode(), false, false, true); - return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT, Arg->ops())); - } - } - } + bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); + bool LegalOperations = !DCI.isBeforeLegalizeOps(); + if (TLI.getNegatibleCost(Arg, DAG, LegalOperations, CodeSize) != + TargetLowering::NegatibleCost::Expensive) + return DAG.getBitcast( + OrigVT, TLI.getNegatedExpression(Arg, DAG, LegalOperations, CodeSize)); return SDValue(); } @@ -43392,7 +43380,7 @@ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) return FPLogic; - return combineFneg(N, DAG, Subtarget); + return combineFneg(N, DAG, DCI, Subtarget); } static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, @@ -43497,6 +43485,7 @@ /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. static SDValue combineFOr(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); @@ -43508,7 +43497,7 @@ if (isNullFPScalarOrVectorConst(N->getOperand(1))) return N->getOperand(0); - if (SDValue NewVal = combineFneg(N, DAG, Subtarget)) + if (SDValue NewVal = combineFneg(N, DAG, DCI, Subtarget)) return NewVal; return lowerX86FPLogicOp(N, DAG, Subtarget); @@ -46672,14 +46661,14 @@ return combineUIntToFP(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); - case ISD::FNEG: return combineFneg(N, DAG, Subtarget); + case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget); case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); case X86ISD::VTRUNC: return combineVTRUNC(N, DAG); case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); case X86ISD::FXOR: - case X86ISD::FOR: return combineFOr(N, DAG, Subtarget); + case X86ISD::FOR: return combineFOr(N, DAG, DCI, Subtarget); case X86ISD::FMIN: case X86ISD::FMAX: return combineFMinFMax(N, DAG); case ISD::FMINNUM: diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -243,15 +243,10 @@ ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -285,15 +280,10 @@ ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -840,20 +830,12 @@ ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 -; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -903,20 +885,12 @@ ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 -; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -1678,20 +1652,12 @@ ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm2 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 ; X86-SSE2-NEXT: pmaxsw %xmm2, %xmm0 -; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm4, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm4, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -1751,20 +1717,12 @@ ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm2 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 ; X64-SSE2-NEXT: pmaxsw %xmm2, %xmm0 -; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm4, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm4, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -2034,15 +1992,10 @@ ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -2077,15 +2030,10 @@ ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -2154,15 +2102,10 @@ ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -2197,15 +2140,10 @@ ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll --- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -245,15 +245,10 @@ ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -281,15 +276,10 @@ ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -780,20 +770,12 @@ ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 -; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -834,20 +816,12 @@ ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 -; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -1582,20 +1556,12 @@ ; X86-SSE2-NEXT: pminsw %xmm1, %xmm2 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0 ; X86-SSE2-NEXT: pminsw %xmm2, %xmm0 -; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm4, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm4, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -1646,20 +1612,12 @@ ; X64-SSE2-NEXT: pminsw %xmm1, %xmm2 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0 ; X64-SSE2-NEXT: pminsw %xmm2, %xmm0 -; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm4, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm4, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -1898,15 +1856,10 @@ ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -1935,15 +1888,10 @@ ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -1985,15 +1933,10 @@ ; X86-SSE2-NEXT: pxor %xmm2, %xmm0 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X86-SSE2-NEXT: pxor %xmm2, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: psrld $16, %xmm1 -; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X86-SSE2-NEXT: movd %xmm1, %eax ; X86-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 @@ -2022,15 +1965,10 @@ ; X64-SSE2-NEXT: pxor %xmm2, %xmm0 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 -; X64-SSE2-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; X64-SSE2-NEXT: pxor %xmm2, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: psrld $16, %xmm1 -; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1 ; X64-SSE2-NEXT: movd %xmm1, %eax ; X64-SSE2-NEXT: xorl $32768, %eax ## imm = 0x8000 diff --git a/llvm/test/CodeGen/X86/vector-reduce-umax.ll b/llvm/test/CodeGen/X86/vector-reduce-umax.ll --- a/llvm/test/CodeGen/X86/vector-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umax.ll @@ -1208,9 +1208,7 @@ ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 @@ -1259,15 +1257,10 @@ ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 @@ -1324,20 +1317,12 @@ ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 @@ -1419,20 +1404,12 @@ ; SSE2-NEXT: pmaxsw %xmm1, %xmm2 ; SSE2-NEXT: pxor %xmm4, %xmm0 ; SSE2-NEXT: pmaxsw %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm4, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 @@ -1532,20 +1509,12 @@ ; SSE2-NEXT: pmaxsw %xmm5, %xmm1 ; SSE2-NEXT: pmaxsw %xmm4, %xmm1 ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: pxor %xmm8, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm8, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] -; SSE2-NEXT: pxor %xmm8, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; SSE2-NEXT: pmaxsw %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: pmaxsw %xmm1, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 diff --git a/llvm/test/CodeGen/X86/vector-reduce-umin.ll b/llvm/test/CodeGen/X86/vector-reduce-umin.ll --- a/llvm/test/CodeGen/X86/vector-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-umin.ll @@ -1206,9 +1206,7 @@ ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pminsw %xmm1, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 @@ -1257,15 +1255,10 @@ ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; SSE2-NEXT: pminsw %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 @@ -1303,20 +1296,12 @@ ; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: pminsw %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; SSE2-NEXT: pminsw %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 @@ -1375,20 +1360,12 @@ ; SSE2-NEXT: pminsw %xmm1, %xmm2 ; SSE2-NEXT: pxor %xmm4, %xmm0 ; SSE2-NEXT: pminsw %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: pxor %xmm4, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm4, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; SSE2-NEXT: pxor %xmm4, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3] ; SSE2-NEXT: pminsw %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: pxor %xmm4, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000 @@ -1463,20 +1440,12 @@ ; SSE2-NEXT: pminsw %xmm5, %xmm1 ; SSE2-NEXT: pminsw %xmm4, %xmm1 ; SSE2-NEXT: pminsw %xmm0, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; SSE2-NEXT: pxor %xmm8, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE2-NEXT: pminsw %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm8, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] -; SSE2-NEXT: pxor %xmm8, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] ; SSE2-NEXT: pminsw %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: psrld $16, %xmm0 -; SSE2-NEXT: pxor %xmm8, %xmm0 ; SSE2-NEXT: pminsw %xmm1, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: xorl $32768, %eax # imm = 0x8000