Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -16446,6 +16446,11 @@ return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); } +// Sign extension of the low part of vector elements. This may be used either +// when sign extend instructions are not available or if the vector element +// sizes already match the sign-extended size. If the vector elements are in +// their pre-extended size and sign extend instructions are available, that will +// be handled by LowerSIGN_EXTEND. SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -16491,32 +16496,13 @@ case MVT::v4i32: case MVT::v8i16: { SDValue Op0 = Op.getOperand(0); - SDValue Op00 = Op0.getOperand(0); - SDValue Tmp1; - // Hopefully, this VECTOR_SHUFFLE is just a VZEXT. - if (Op0.getOpcode() == ISD::BITCAST && - Op00.getOpcode() == ISD::VECTOR_SHUFFLE) { - // (sext (vzext x)) -> (vsext x) - Tmp1 = LowerVectorIntExtend(Op00, Subtarget, DAG); - if (Tmp1.getNode()) { - EVT ExtraEltVT = ExtraVT.getVectorElementType(); - // This folding is only valid when the in-reg type is a vector of i8, - // i16, or i32. - if (ExtraEltVT == MVT::i8 || ExtraEltVT == MVT::i16 || - ExtraEltVT == MVT::i32) { - SDValue Tmp1Op0 = Tmp1.getOperand(0); - assert(Tmp1Op0.getOpcode() == X86ISD::VZEXT && - "This optimization is invalid without a VZEXT."); - return DAG.getNode(X86ISD::VSEXT, dl, VT, Tmp1Op0.getOperand(0)); - } - Op0 = Tmp1; - } - } - // If the above didn't work, then just use Shift-Left + Shift-Right. - Tmp1 = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0, BitsDiff, - DAG); - return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Tmp1, BitsDiff, + // This is a sign extension of some low part of vector elements without + // changing the size of the vector elements themselves: + // Shift-Left + Shift-Right-Algebraic. + SDValue Shl = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Op0, + BitsDiff, DAG); + return getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Shl, BitsDiff, DAG); } } Index: llvm/trunk/test/CodeGen/X86/vec_trunc_sext.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vec_trunc_sext.ll +++ llvm/trunk/test/CodeGen/X86/vec_trunc_sext.ll @@ -0,0 +1,30 @@ +; RUN: llc %s -mtriple=x86_64-unknown-unknown -mattr='-sse4.1' -o - | FileCheck %s -check-prefix=NO_SSE_41 +; RUN: llc %s -mtriple=x86_64-unknown-unknown -mattr='+sse4.1' -o - | FileCheck %s -check-prefix=SSE_41 + +; PR20472 ( http://llvm.org/bugs/show_bug.cgi?id=20472 ) +; When sexting a trunc'd vector value, we can't eliminate the zext. +; If we don't have SSE4.1, use punpck. +; If we have SSE4.1, use pmovzx because it combines the load op. +; There may be a better way to do this using pshufb + pmovsx, +; but that is beyond our current codegen capabilities. + +define <4 x i32> @trunc_sext(<4 x i16>* %in) { + %load = load <4 x i16>* %in + %trunc = trunc <4 x i16> %load to <4 x i8> + %sext = sext <4 x i8> %trunc to <4 x i32> + ret <4 x i32> %sext + +; NO_SSE_41-LABEL: trunc_sext: +; NO_SSE_41: movq (%rdi), %xmm0 +; NO_SSE_41-NEXT: punpcklwd %xmm0, %xmm0 +; NO_SSE_41-NEXT: pslld $24, %xmm0 +; NO_SSE_41-NEXT: psrad $24, %xmm0 +; NO_SSE_41-NEXT: retq + +; SSE_41-LABEL: trunc_sext: +; SSE_41: pmovzxwd (%rdi), %xmm0 +; SSE_41-NEXT: pslld $24, %xmm0 +; SSE_41-NEXT: psrad $24, %xmm0 +; SSE_41-NEXT: retq +} +