Index: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h @@ -379,6 +379,15 @@ /// operand, a ValueType node. SIGN_EXTEND_INREG, + /// ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an + /// in-register zero-extension of the low lanes of an integer vector. The + /// result type must have fewer elements than the operand type, and those + /// elements must be larger integer types such that the total size of the + /// operand type and the result type match. Each of the low operand + /// elements is zero-extended into the corresponding, wider result + /// elements. + ZERO_EXTEND_VECTOR_INREG, + /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned /// integer. FP_TO_SINT, Index: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h @@ -562,6 +562,12 @@ /// value assuming it was the smaller SrcTy value. SDValue getZeroExtendInReg(SDValue Op, SDLoc DL, EVT SrcTy); + /// getZeroExtendVectorInReg - Return an operation which will zero extend the + /// low lanes of the operand into the specified vector type. For example, + /// this can convert a v16i8 into a v4i32 by zero extending the low four + /// lanes of the operand from i8 to i32. + SDValue getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT); + /// getBoolExtOrTrunc - Convert Op, which must be of integer type, to the /// integer type VT, by using an extension appropriate for the target's /// BooleanContent or truncating it. Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -649,6 +649,7 @@ SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_SETCC(SDNode* N); + SDValue WidenVecOp_ZERO_EXTEND(SDNode *N); SDValue WidenVecOp_Convert(SDNode *N); Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -75,6 +75,12 @@ /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); + /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and blends zeros into + /// the remaining lanes, finally bitcasting to the proper type. + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + /// \brief Expand bswap of vectors into a shuffle if legal. SDValue ExpandBSWAP(SDValue Op); @@ -274,6 +280,7 @@ case ISD::FP_EXTEND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -614,6 +621,8 @@ switch (Op->getOpcode()) { case ISD::SIGN_EXTEND_INREG: return ExpandSEXTINREG(Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ExpandZERO_EXTEND_VECTOR_INREG(Op); case ISD::BSWAP: return ExpandBSWAP(Op); case ISD::VSELECT: @@ -708,6 +717,39 @@ return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } +// Generically expand a vector zext in register to a shuffle of the relevant +// lanes into the appropriate locations, a blend of zero into the high bits, +// and a bitcast to the wider element type. +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build up a zero vector to blend into this one. + EVT SrcScalarVT = SrcVT.getScalarType(); + SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT); + SmallVector BuildVectorOperands(NumSrcElements, ScalarZero); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + + // Shuffle the incoming lanes into the correct position, and pull all other + // lanes from the zero vector. + SmallVector ShuffleMask; + ShuffleMask.reserve(NumSrcElements); + for (int i = 0; i < NumSrcElements; ++i) + ShuffleMask.push_back(i); + + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); +} + SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { EVT VT = Op.getValueType(); Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2380,6 +2380,7 @@ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::ZERO_EXTEND: Res = WidenVecOp_ZERO_EXTEND(N); break; case ISD::FP_EXTEND: case ISD::FP_TO_SINT: @@ -2388,7 +2389,6 @@ case ISD::UINT_TO_FP: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = WidenVecOp_Convert(N); break; @@ -2410,6 +2410,26 @@ return false; } +SDValue DAGTypeLegalizer::WidenVecOp_ZERO_EXTEND(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue InOp = N->getOperand(0); + // If some legalization strategy other than widening is used on the operand, + // we can't safely assume that just zero-extending the low lanes is the + // correct transformation. + if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) + return WidenVecOp_Convert(N); + InOp = GetWidenedVector(InOp); + EVT InVT = InOp.getValueType(); + assert(NumElts < InVT.getVectorNumElements() && "Input wasn't widened!"); + + // Use a special DAG node to represent the operation of zero extending the + // low lanes. + return DAG.getZeroExtendVectorInReg(InOp, DL, VT); +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1032,6 +1032,13 @@ getConstant(Imm, Op.getValueType())); } +SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -221,6 +221,7 @@ case ISD::ZERO_EXTEND: return "zero_extend"; case ISD::ANY_EXTEND: return "any_extend"; case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -869,6 +869,7 @@ setOperationAction(ISD::TRUNCATE, VT, Expand); setOperationAction(ISD::SIGN_EXTEND, VT, Expand); setOperationAction(ISD::ZERO_EXTEND, VT, Expand); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); setOperationAction(ISD::SELECT_CC, VT, Expand); Index: llvm/trunk/test/CodeGen/X86/widen_conversions.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/widen_conversions.ll +++ llvm/trunk/test/CodeGen/X86/widen_conversions.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mcpu=x86-64 -x86-experimental-vector-widening-legalization -x86-experimental-vector-shuffle-lowering | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +define <4 x i32> @zext_v4i8_to_v4i32(<4 x i8>* %ptr) { +; CHECK-LABEL: zext_v4i8_to_v4i32: +; +; CHECK: movd (%{{.*}}), %[[X:xmm[0-9]+]] +; CHECK-NEXT: pxor %[[Z:xmm[0-9]+]], %[[Z]] +; CHECK-NEXT: punpcklbw %[[Z]], %[[X]] +; CHECK-NEXT: punpcklbw %[[Z]], %[[X]] +; CHECK-NEXT: ret + + %val = load <4 x i8>* %ptr + %ext = zext <4 x i8> %val to <4 x i32> + ret <4 x i32> %ext +}