Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -270,6 +270,13 @@ int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const; + /// \brief Return true if the target works with masked instruction + /// AVX2 allows masks for consecutive load and store for i32 and i64 elements. + /// AVX-512 architecture will also allow masks for non-consecutive memory + /// accesses. + virtual bool isLegalPredicatedStore(Type *DataType, int Consecutive) const; + virtual bool isLegalPredicatedLoad (Type *DataType, int Consecutive) const; + /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store /// of the specified type. Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -675,6 +675,9 @@ ATOMIC_LOAD_UMIN, ATOMIC_LOAD_UMAX, + // Masked load and store + MLOAD, MSTORE, + /// This corresponds to the llvm.lifetime.* intrinsics. The first operand /// is the chain and the second operand is the alloca pointer. LIFETIME_START, LIFETIME_END, Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -866,6 +866,10 @@ SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM); + SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, + SDValue Mask, SDValue Src0, MachineMemOperand *MMO); + SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, SDValue Mask, MachineMemOperand *MMO); /// getSrcValue - Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); Index: include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- include/llvm/CodeGen/SelectionDAGNodes.h +++ include/llvm/CodeGen/SelectionDAGNodes.h @@ -1177,6 +1177,8 @@ N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || N->getOpcode() == ISD::ATOMIC_LOAD || N->getOpcode() == ISD::ATOMIC_STORE || + N->getOpcode() == ISD::MLOAD || + N->getOpcode() == ISD::MSTORE || N->isMemIntrinsic() || N->isTargetMemoryOpcode(); } @@ -1926,6 +1928,72 @@ } }; +/// MaskedLoadStoreSDNode - This is a base class is used to represent MLOAD and +/// MSTORE nodes +/// +class MaskedLoadStoreSDNode : public MemSDNode { + // Operands + SDUse Ops[4]; +public: + friend class SelectionDAG; + MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl, + SDValue *Operands, unsigned numOperands, + SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + InitOperands(Ops, Operands, numOperands); + } + + // In the both nodes address is Op1, mask is Op2: + // MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value + // MaskedStoreSDNode (Chain, ptr, mask, data) + // Mask is a vector of i1 elements + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getMask() const { return getOperand(2); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::MLOAD || + N->getOpcode() == ISD::MSTORE; + } +}; + +/// MaskedLoadSDNode - This class is used to represent an MLOAD node +/// +class MaskedLoadSDNode : public MaskedLoadStoreSDNode { +public: + friend class SelectionDAG; + MaskedLoadSDNode(unsigned Order, DebugLoc dl, + SDValue *Operands, unsigned numOperands, + SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands, + VTs, MemVT, MMO) + {} + + const SDValue &getSrc0() const { return getOperand(3); } + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::MLOAD; + } +}; + +/// MaskedStoreSDNode - This class is used to represent an MSTORE node +/// +class MaskedStoreSDNode : public MaskedLoadStoreSDNode { + +public: + friend class SelectionDAG; + MaskedStoreSDNode(unsigned Order, DebugLoc dl, + SDValue *Operands, unsigned numOperands, + SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands, + VTs, MemVT, MMO) + {} + + const SDValue &getData() const { return getOperand(3); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::MSTORE; + } +}; + /// MachineSDNode - An SDNode that represents everything that will be needed /// to construct a MachineInstr. These nodes are created during the /// instruction selection proper phase. Index: include/llvm/IR/IRBuilder.h =================================================================== --- include/llvm/IR/IRBuilder.h +++ include/llvm/IR/IRBuilder.h @@ -429,11 +429,22 @@ /// If the pointer isn't i8* it will be converted. CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = nullptr); + /// \brief Create a call to Masked Load intrinsic + CallInst *CreateMaskedLoad(ArrayRef Ops); + + /// \brief Create a call to Masked Store intrinsic + CallInst *CreateMaskedStore(ArrayRef Ops); + /// \brief Create an assume intrinsic call that allows the optimizer to /// assume that the provided condition will be true. CallInst *CreateAssumption(Value *Cond); private: + /// \brief Create a call to a masked intrinsic with given Id. + /// Masked intrinsic has only one overloaded type - data type. + CallInst *CreateMaskedIntrinsic(unsigned Id, ArrayRef Ops, + Type *DataTy); + Value *getCastedInt8PtrValue(Value *Ptr); }; Index: include/llvm/IR/Intrinsics.h =================================================================== --- include/llvm/IR/Intrinsics.h +++ include/llvm/IR/Intrinsics.h @@ -76,7 +76,8 @@ enum IITDescriptorKind { Void, VarArg, MMX, Metadata, Half, Float, Double, Integer, Vector, Pointer, Struct, - Argument, ExtendArgument, TruncArgument, HalfVecArgument + Argument, ExtendArgument, TruncArgument, HalfVecArgument, + SameVecWidthArgument } Kind; union { @@ -96,13 +97,15 @@ }; unsigned getArgumentNumber() const { assert(Kind == Argument || Kind == ExtendArgument || - Kind == TruncArgument || Kind == HalfVecArgument); + Kind == TruncArgument || Kind == HalfVecArgument || + Kind == SameVecWidthArgument); return Argument_Info >> 2; } ArgKind getArgumentKind() const { assert(Kind == Argument || Kind == ExtendArgument || - Kind == TruncArgument || Kind == HalfVecArgument); - return (ArgKind)(Argument_Info&3); + Kind == TruncArgument || Kind == HalfVecArgument || + Kind == SameVecWidthArgument); + return (ArgKind)(Argument_Info & 3); } static IITDescriptor get(IITDescriptorKind K, unsigned Field) { Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -112,6 +112,10 @@ // the intrinsic is overloaded, so the matched type should be declared as iAny. class LLVMExtendedType : LLVMMatchType; class LLVMTruncatedType : LLVMMatchType; +class LLVMVectorSameWidth + : LLVMMatchType { + ValueType ElTy = elty.VT; +} // Match the type of another intrinsic parameter that is expected to be a // vector type, but change the element count to be half as many @@ -539,6 +543,17 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], "llvm.clear_cache">; +//===-------------------------- Masked Intrinsics -------------------------===// +// +def int_masked_store : Intrinsic<[], [llvm_ptr_ty, llvm_anyvector_ty, + llvm_i32_ty, + LLVMVectorSameWidth<0, llvm_i1_ty>], + [IntrReadWriteArgMem]>; + +def int_masked_load : Intrinsic<[llvm_anyvector_ty], + [llvm_ptr_ty, LLVMMatchType<0>, llvm_i32_ty, + LLVMVectorSameWidth<0, llvm_i1_ty>], + [IntrReadArgMem]>; //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -188,6 +188,14 @@ SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3> ]>; +def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store + SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2> +]>; + +def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load + SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3> +]>; + def SDTVecShuffle : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; @@ -454,6 +462,13 @@ def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def masked_store : SDNode<"ISD::MSTORE", SDTMaskedStore, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + + + // Do not use ld, st directly. Use load, extload, sextload, zextload, store, // and truncst (see below). def ld : SDNode<"ISD::LOAD" , SDTLoad, Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -101,6 +101,17 @@ return PrevTTI->isLegalICmpImmediate(Imm); } +bool TargetTransformInfo::isLegalPredicatedLoad(Type *DataType, + int Consecutive) const { + return false; +} + +bool TargetTransformInfo::isLegalPredicatedStore(Type *DataType, + int Consecutive) const { + return false; +} + + bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -303,6 +303,8 @@ SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); + SDValue visitMLOAD(SDNode *N); + SDValue visitMSTORE(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -412,6 +414,7 @@ EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(*DAG.getContext(), VT); } + int& MLD(); }; } @@ -1347,6 +1350,8 @@ case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::MLOAD: return visitMLOAD(N); + case ISD::MSTORE: return visitMSTORE(N); } return SDValue(); } @@ -4768,6 +4773,162 @@ TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } +SDValue DAGCombiner::visitMSTORE(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedStoreSDNode *MST = dyn_cast(N); + SDValue Mask = MST->getMask(); + SDValue Data = MST->getData(); + SDLoc DL(N); + + // If the MSTORE data type requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (Mask.getOpcode() == ISD::SETCC) { + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); + + SDValue Chain = MST->getChain(); + SDValue Ptr = MST->getBasePtr(); + + EVT MemoryVT = MST->getMemoryVT(); + unsigned Alignment = MST->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, MST->getAAInfo(), MST->getRanges()); + + Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, MST->getAAInfo(), + MST->getRanges()); + + Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + } + return SDValue(); +} + +SDValue DAGCombiner::visitMLOAD(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedLoadSDNode *MLD = dyn_cast(N); + SDValue Mask = MLD->getMask(); + SDLoc DL(N); + + // If the MLOAD result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + + if (Mask.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + SDValue Src0 = MLD->getSrc0(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Chain = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + EVT MemoryVT = MLD->getMemoryVT(); + unsigned Alignment = MLD->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); + + SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + + SDValue RetOps[] = { LoadRes, Chain }; + return DAG.getMergeValues(RetOps, DL); + } + return SDValue(); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -825,6 +825,10 @@ case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast(N), OpNo); break; + case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast(N), + OpNo); break; + case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast(N), + OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; @@ -1091,6 +1095,25 @@ N->getMemoryVT(), N->getMemOperand()); } +SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ + + assert(OpNo == 2 && "Only know how to promote the mask!"); + EVT DataVT = N->getOperand(3).getValueType(); + SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ + assert(OpNo == 2 && "Only know how to promote the mask!"); + EVT DataVT = N->getValueType(0); + SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + SmallVector NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -285,6 +285,8 @@ SDValue PromoteIntOp_TRUNCATE(SDNode *N); SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -578,6 +580,7 @@ void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -594,6 +597,7 @@ SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1,3127 +1,3241 @@ -//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file performs vector type splitting and scalarization for LegalizeTypes. -// Scalarization is the act of changing a computation in an illegal one-element -// vector type to be a computation in its scalar element type. For example, -// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed -// as a base case when scalarizing vector arithmetic like <4 x f32>, which -// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 -// types. -// Splitting is the act of changing a computation in an invalid vector type to -// be a computation in two vectors of half the size. For example, implementing -// <128 x f32> operations in terms of two <64 x f32> operations. -// -//===----------------------------------------------------------------------===// - -#include "LegalizeTypes.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -#define DEBUG_TYPE "legalize-types" - -//===----------------------------------------------------------------------===// -// Result Vector Scalarization: <1 x ty> -> ty. -//===----------------------------------------------------------------------===// - -void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); - SDValue R = SDValue(); - - switch (N->getOpcode()) { - default: -#ifndef NDEBUG - dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; - N->dump(&DAG); - dbgs() << "\n"; -#endif - report_fatal_error("Do not know how to scalarize the result of this " - "operator!\n"); - - case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; - case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; - case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; - case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; - case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; - case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; - case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; - case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; - case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; - case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; - case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; - case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; - case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; - case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; - case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; - case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; - case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; - case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; - case ISD::ANY_EXTEND: - case ISD::BSWAP: - case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: - case ISD::CTPOP: - case ISD::CTTZ: - case ISD::CTTZ_ZERO_UNDEF: - case ISD::FABS: - case ISD::FCEIL: - case ISD::FCOS: - case ISD::FEXP: - case ISD::FEXP2: - case ISD::FFLOOR: - case ISD::FLOG: - case ISD::FLOG10: - case ISD::FLOG2: - case ISD::FNEARBYINT: - case ISD::FNEG: - case ISD::FP_EXTEND: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::FRINT: - case ISD::FROUND: - case ISD::FSIN: - case ISD::FSQRT: - case ISD::FTRUNC: - case ISD::SIGN_EXTEND: - case ISD::SINT_TO_FP: - case ISD::TRUNCATE: - case ISD::UINT_TO_FP: - case ISD::ZERO_EXTEND: - R = ScalarizeVecRes_UnaryOp(N); - break; - - case ISD::ADD: - case ISD::AND: - case ISD::FADD: - case ISD::FCOPYSIGN: - case ISD::FDIV: - case ISD::FMUL: - case ISD::FMINNUM: - case ISD::FMAXNUM: - - case ISD::FPOW: - case ISD::FREM: - case ISD::FSUB: - case ISD::MUL: - case ISD::OR: - case ISD::SDIV: - case ISD::SREM: - case ISD::SUB: - case ISD::UDIV: - case ISD::UREM: - case ISD::XOR: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - R = ScalarizeVecRes_BinOp(N); - break; - case ISD::FMA: - R = ScalarizeVecRes_TernaryOp(N); - break; - } - - // If R is null, the sub-method took care of registering the result. - if (R.getNode()) - SetScalarizedVector(SDValue(N, ResNo), R); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue RHS = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { - SDValue Op0 = GetScalarizedVector(N->getOperand(0)); - SDValue Op1 = GetScalarizedVector(N->getOperand(1)); - SDValue Op2 = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - Op0.getValueType(), Op0, Op1, Op2); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, - unsigned ResNo) { - SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); - return GetScalarizedVector(Op); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { - EVT NewVT = N->getValueType(0).getVectorElementType(); - return DAG.getNode(ISD::BITCAST, SDLoc(N), - NewVT, N->getOperand(0)); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { - EVT EltVT = N->getValueType(0).getVectorElementType(); - SDValue InOp = N->getOperand(0); - // The BUILD_VECTOR operands may be of wider element types and - // we may need to truncate them back to the requested return type. - if (EltVT.isInteger()) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); - return InOp; -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { - EVT NewVT = N->getValueType(0).getVectorElementType(); - SDValue Op0 = GetScalarizedVector(N->getOperand(0)); - return DAG.getConvertRndSat(NewVT, SDLoc(N), - Op0, DAG.getValueType(NewVT), - DAG.getValueType(Op0.getValueType()), - N->getOperand(3), - N->getOperand(4), - cast(N)->getCvtCode()); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), - N->getValueType(0).getVectorElementType(), - N->getOperand(0), N->getOperand(1)); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { - EVT NewVT = N->getValueType(0).getVectorElementType(); - SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), - NewVT, Op, N->getOperand(1)); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { - SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FPOWI, SDLoc(N), - Op.getValueType(), Op, N->getOperand(1)); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { - // The value to insert may have a wider type than the vector element type, - // so be sure to truncate it to the element type if necessary. - SDValue Op = N->getOperand(1); - EVT EltVT = N->getValueType(0).getVectorElementType(); - if (Op.getValueType() != EltVT) - // FIXME: Can this happen for floating point types? - Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op); - return Op; -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { - assert(N->isUnindexed() && "Indexed vector load?"); - - SDValue Result = DAG.getLoad(ISD::UNINDEXED, - N->getExtensionType(), - N->getValueType(0).getVectorElementType(), - SDLoc(N), - N->getChain(), N->getBasePtr(), - DAG.getUNDEF(N->getBasePtr().getValueType()), - N->getPointerInfo(), - N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getOriginalAlignment(), - N->getAAInfo()); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); - return Result; -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { - // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. - EVT DestVT = N->getValueType(0).getVectorElementType(); - SDValue Op = N->getOperand(0); - EVT OpVT = Op.getValueType(); - SDLoc DL(N); - // The result needs scalarizing, but it's not a given that the source does. - // This is a workaround for targets where it's impossible to scalarize the - // result of a conversion, because the source type is legal. - // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} - // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is - // legal and was not scalarized. - // See the similar logic in ScalarizeVecRes_VSETCC - if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { - Op = GetScalarizedVector(Op); - } else { - EVT VT = OpVT.getVectorElementType(); - Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, - DAG.getConstant(0, TLI.getVectorIdxTy())); - } - return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { - EVT EltVT = N->getValueType(0).getVectorElementType(); - EVT ExtVT = cast(N->getOperand(1))->getVT().getVectorElementType(); - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT, - LHS, DAG.getValueType(ExtVT)); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { - // If the operand is wider than the vector element type then it is implicitly - // truncated. Make that explicit here. - EVT EltVT = N->getValueType(0).getVectorElementType(); - SDValue InOp = N->getOperand(0); - if (InOp.getValueType() != EltVT) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); - return InOp; -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { - SDValue Cond = GetScalarizedVector(N->getOperand(0)); - SDValue LHS = GetScalarizedVector(N->getOperand(1)); - TargetLowering::BooleanContent ScalarBool = - TLI.getBooleanContents(false, false); - TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); - - // If integer and float booleans have different contents then we can't - // reliably optimize in all cases. There is a full explanation for this in - // DAGCombiner::visitSELECT() where the same issue affects folding - // (select C, 0, 1) to (xor C, 1). - if (TLI.getBooleanContents(false, false) != - TLI.getBooleanContents(false, true)) { - // At least try the common case where the boolean is generated by a - // comparison. - if (Cond->getOpcode() == ISD::SETCC) { - EVT OpVT = Cond->getOperand(0)->getValueType(0); - ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); - VecBool = TLI.getBooleanContents(OpVT); - } else - ScalarBool = TargetLowering::UndefinedBooleanContent; - } - - if (ScalarBool != VecBool) { - EVT CondVT = Cond.getValueType(); - switch (ScalarBool) { - case TargetLowering::UndefinedBooleanContent: - break; - case TargetLowering::ZeroOrOneBooleanContent: - assert(VecBool == TargetLowering::UndefinedBooleanContent || - VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); - // Vector read from all ones, scalar expects a single 1 so mask. - Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, - Cond, DAG.getConstant(1, CondVT)); - break; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - assert(VecBool == TargetLowering::UndefinedBooleanContent || - VecBool == TargetLowering::ZeroOrOneBooleanContent); - // Vector reads from a one, scalar from all ones so sign extend. - Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT, - Cond, DAG.getValueType(MVT::i1)); - break; - } - } - - return DAG.getSelect(SDLoc(N), - LHS.getValueType(), Cond, LHS, - GetScalarizedVector(N->getOperand(2))); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { - SDValue LHS = GetScalarizedVector(N->getOperand(1)); - return DAG.getSelect(SDLoc(N), - LHS.getValueType(), N->getOperand(0), LHS, - GetScalarizedVector(N->getOperand(2))); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { - SDValue LHS = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), - N->getOperand(0), N->getOperand(1), - LHS, GetScalarizedVector(N->getOperand(3)), - N->getOperand(4)); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { - assert(N->getValueType(0).isVector() == - N->getOperand(0).getValueType().isVector() && - "Scalar/Vector type mismatch"); - - if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N); - - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue RHS = GetScalarizedVector(N->getOperand(1)); - SDLoc DL(N); - - // Turn it into a scalar SETCC. - return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { - return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { - // Figure out if the scalar is the LHS or RHS and return it. - SDValue Arg = N->getOperand(2).getOperand(0); - if (Arg.getOpcode() == ISD::UNDEF) - return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); - unsigned Op = !cast(Arg)->isNullValue(); - return GetScalarizedVector(N->getOperand(Op)); -} - -SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { - assert(N->getValueType(0).isVector() && - N->getOperand(0).getValueType().isVector() && - "Operand types must be vectors"); - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - EVT OpVT = LHS.getValueType(); - EVT NVT = N->getValueType(0).getVectorElementType(); - SDLoc DL(N); - - // The result needs scalarizing, but it's not a given that the source does. - if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { - LHS = GetScalarizedVector(LHS); - RHS = GetScalarizedVector(RHS); - } else { - EVT VT = OpVT.getVectorElementType(); - LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, - DAG.getConstant(0, TLI.getVectorIdxTy())); - RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, - DAG.getConstant(0, TLI.getVectorIdxTy())); - } - - // Turn it into a scalar SETCC. - SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, - N->getOperand(2)); - // Vectors may have a different boolean contents to scalars. Promote the - // value appropriately. - ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); - return DAG.getNode(ExtendCode, DL, NVT, Res); -} - - -//===----------------------------------------------------------------------===// -// Operand Vector Scalarization <1 x ty> -> ty. -//===----------------------------------------------------------------------===// - -bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); - SDValue Res = SDValue(); - - if (!Res.getNode()) { - switch (N->getOpcode()) { - default: -#ifndef NDEBUG - dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"; -#endif - llvm_unreachable("Do not know how to scalarize this operator's operand!"); - case ISD::BITCAST: - Res = ScalarizeVecOp_BITCAST(N); - break; - case ISD::ANY_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::TRUNCATE: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - Res = ScalarizeVecOp_UnaryOp(N); - break; - case ISD::CONCAT_VECTORS: - Res = ScalarizeVecOp_CONCAT_VECTORS(N); - break; - case ISD::EXTRACT_VECTOR_ELT: - Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); - break; - case ISD::VSELECT: - Res = ScalarizeVecOp_VSELECT(N); - break; - case ISD::STORE: - Res = ScalarizeVecOp_STORE(cast(N), OpNo); - break; - case ISD::FP_ROUND: - Res = ScalarizeVecOp_FP_ROUND(N, OpNo); - break; - } - } - - // If the result is null, the sub-method took care of registering results etc. - if (!Res.getNode()) return false; - - // If the result is N, the sub-method updated N in place. Tell the legalizer - // core about this. - if (Res.getNode() == N) - return true; - - assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand expansion"); - - ReplaceValueWith(SDValue(N, 0), Res); - return false; -} - -/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs -/// to be scalarized, it must be <1 x ty>. Convert the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { - SDValue Elt = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::BITCAST, SDLoc(N), - N->getValueType(0), Elt); -} - -/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be -/// scalarized, it must be <1 x ty>. Do the operation on the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { - assert(N->getValueType(0).getVectorNumElements() == 1 && - "Unexpected vector type!"); - SDValue Elt = GetScalarizedVector(N->getOperand(0)); - SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), - N->getValueType(0).getScalarType(), Elt); - // Revectorize the result so the types line up with what the uses of this - // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); -} - -/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - -/// use a BUILD_VECTOR instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { - SmallVector Ops(N->getNumOperands()); - for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) - Ops[i] = GetScalarizedVector(N->getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops); -} - -/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to -/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the -/// index. -SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { - SDValue Res = GetScalarizedVector(N->getOperand(0)); - if (Res.getValueType() != N->getValueType(0)) - Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), - Res); - return Res; -} - - -/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be -/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT -/// (still with vector output type since that was acceptable if we got here). -SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { - SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); - EVT VT = N->getValueType(0); - - return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1), - N->getOperand(2)); -} - -/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be -/// scalarized, it must be <1 x ty>. Just store the element. -SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ - assert(N->isUnindexed() && "Indexed store of one-element vector?"); - assert(OpNo == 1 && "Do not know how to scalarize this operand!"); - SDLoc dl(N); - - if (N->isTruncatingStore()) - return DAG.getTruncStore(N->getChain(), dl, - GetScalarizedVector(N->getOperand(1)), - N->getBasePtr(), N->getPointerInfo(), - N->getMemoryVT().getVectorElementType(), - N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getAAInfo()); - - return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), - N->getBasePtr(), N->getPointerInfo(), - N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment(), N->getAAInfo()); -} - -/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs -/// to be scalarized, it must be <1 x ty>. Convert the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { - SDValue Elt = GetScalarizedVector(N->getOperand(0)); - SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), - N->getValueType(0).getVectorElementType(), Elt, - N->getOperand(1)); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); -} - -//===----------------------------------------------------------------------===// -// Result Vector Splitting -//===----------------------------------------------------------------------===// - -/// SplitVectorResult - This method is called when the specified result of the -/// specified node is found to need vector splitting. At this point, the node -/// may also have invalid operands or may have other results that need -/// legalization, we just know that (at least) one result needs vector -/// splitting. -void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Split node result: "; - N->dump(&DAG); - dbgs() << "\n"); - SDValue Lo, Hi; - - // See if the target wants to custom expand this node. - if (CustomLowerNode(N, N->getValueType(ResNo), true)) - return; - - switch (N->getOpcode()) { - default: -#ifndef NDEBUG - dbgs() << "SplitVectorResult #" << ResNo << ": "; - N->dump(&DAG); - dbgs() << "\n"; -#endif - report_fatal_error("Do not know how to split the result of this " - "operator!\n"); - - case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; - case ISD::VSELECT: - case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; - case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; - case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; - case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; - case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; - case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; - case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; - case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; - case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; - case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; - case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; - case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; - case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; - case ISD::LOAD: - SplitVecRes_LOAD(cast(N), Lo, Hi); - break; - case ISD::SETCC: - SplitVecRes_SETCC(N, Lo, Hi); - break; - case ISD::VECTOR_SHUFFLE: - SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); - break; - - case ISD::BSWAP: - case ISD::CONVERT_RNDSAT: - case ISD::CTLZ: - case ISD::CTTZ: - case ISD::CTLZ_ZERO_UNDEF: - case ISD::CTTZ_ZERO_UNDEF: - case ISD::CTPOP: - case ISD::FABS: - case ISD::FCEIL: - case ISD::FCOS: - case ISD::FEXP: - case ISD::FEXP2: - case ISD::FFLOOR: - case ISD::FLOG: - case ISD::FLOG10: - case ISD::FLOG2: - case ISD::FNEARBYINT: - case ISD::FNEG: - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::FRINT: - case ISD::FROUND: - case ISD::FSIN: - case ISD::FSQRT: - case ISD::FTRUNC: - case ISD::SINT_TO_FP: - case ISD::TRUNCATE: - case ISD::UINT_TO_FP: - SplitVecRes_UnaryOp(N, Lo, Hi); - break; - - case ISD::ANY_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - SplitVecRes_ExtendOp(N, Lo, Hi); - break; - - case ISD::ADD: - case ISD::SUB: - case ISD::MUL: - case ISD::FADD: - case ISD::FCOPYSIGN: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FMINNUM: - case ISD::FMAXNUM: - case ISD::SDIV: - case ISD::UDIV: - case ISD::FDIV: - case ISD::FPOW: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::UREM: - case ISD::SREM: - case ISD::FREM: - SplitVecRes_BinOp(N, Lo, Hi); - break; - case ISD::FMA: - SplitVecRes_TernaryOp(N, Lo, Hi); - break; - } - - // If Lo/Hi is null, the sub-method took care of registering results etc. - if (Lo.getNode()) - SetSplitVector(SDValue(N, ResNo), Lo, Hi); -} - -void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue LHSLo, LHSHi; - GetSplitVector(N->getOperand(0), LHSLo, LHSHi); - SDValue RHSLo, RHSHi; - GetSplitVector(N->getOperand(1), RHSLo, RHSHi); - SDLoc dl(N); - - Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); - Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); -} - -void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue Op0Lo, Op0Hi; - GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); - SDValue Op1Lo, Op1Hi; - GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); - SDValue Op2Lo, Op2Hi; - GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); - SDLoc dl(N); - - Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), - Op0Lo, Op1Lo, Op2Lo); - Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), - Op0Hi, Op1Hi, Op2Hi); -} - -void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, - SDValue &Hi) { - // We know the result is a vector. The input may be either a vector or a - // scalar value. - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - SDLoc dl(N); - - SDValue InOp = N->getOperand(0); - EVT InVT = InOp.getValueType(); - - // Handle some special cases efficiently. - switch (getTypeAction(InVT)) { - case TargetLowering::TypeLegal: - case TargetLowering::TypePromoteInteger: - case TargetLowering::TypeSoftenFloat: - case TargetLowering::TypeScalarizeVector: - case TargetLowering::TypeWidenVector: - break; - case TargetLowering::TypeExpandInteger: - case TargetLowering::TypeExpandFloat: - // A scalar to vector conversion, where the scalar needs expansion. - // If the vector is being split in two then we can just convert the - // expanded pieces. - if (LoVT == HiVT) { - GetExpandedOp(InOp, Lo, Hi); - if (TLI.isBigEndian()) - std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); - Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); - return; - } - break; - case TargetLowering::TypeSplitVector: - // If the input is a vector that needs to be split, convert each split - // piece of the input now. - GetSplitVector(InOp, Lo, Hi); - Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); - Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); - return; - } - - // In the general case, convert the input to an integer and split it by hand. - EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); - EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); - if (TLI.isBigEndian()) - std::swap(LoIntVT, HiIntVT); - - SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); - - if (TLI.isBigEndian()) - std::swap(Lo, Hi); - Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); - Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); -} - -void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, - SDValue &Hi) { - EVT LoVT, HiVT; - SDLoc dl(N); - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - unsigned LoNumElts = LoVT.getVectorNumElements(); - SmallVector LoOps(N->op_begin(), N->op_begin()+LoNumElts); - Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps); - - SmallVector HiOps(N->op_begin()+LoNumElts, N->op_end()); - Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps); -} - -void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, - SDValue &Hi) { - assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); - SDLoc dl(N); - unsigned NumSubvectors = N->getNumOperands() / 2; - if (NumSubvectors == 1) { - Lo = N->getOperand(0); - Hi = N->getOperand(1); - return; - } - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - SmallVector LoOps(N->op_begin(), N->op_begin()+NumSubvectors); - Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps); - - SmallVector HiOps(N->op_begin()+NumSubvectors, N->op_end()); - Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps); -} - -void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue Vec = N->getOperand(0); - SDValue Idx = N->getOperand(1); - SDLoc dl(N); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); - uint64_t IdxVal = cast(Idx)->getZExtValue(); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), - TLI.getVectorIdxTy())); -} - -void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue Vec = N->getOperand(0); - SDValue SubVec = N->getOperand(1); - SDValue Idx = N->getOperand(2); - SDLoc dl(N); - GetSplitVector(Vec, Lo, Hi); - - // Spill the vector to the stack. - EVT VecVT = Vec.getValueType(); - EVT SubVecVT = VecVT.getVectorElementType(); - SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); - - // Store the new subvector into the specified index. - SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); - Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType); - Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), - false, false, 0); - - // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, 0); - - // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; - StackPtr = - DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, StackPtr.getValueType())); - - // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, MinAlign(Alignment, IncrementSize)); -} - -void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDLoc dl(N); - GetSplitVector(N->getOperand(0), Lo, Hi); - Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); - Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); -} - -void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue LHSLo, LHSHi; - GetSplitVector(N->getOperand(0), LHSLo, LHSHi); - SDLoc dl(N); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = - DAG.GetSplitDestVTs(cast(N->getOperand(1))->getVT()); - - Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, - DAG.getValueType(LoVT)); - Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, - DAG.getValueType(HiVT)); -} - -void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue Vec = N->getOperand(0); - SDValue Elt = N->getOperand(1); - SDValue Idx = N->getOperand(2); - SDLoc dl(N); - GetSplitVector(Vec, Lo, Hi); - - if (ConstantSDNode *CIdx = dyn_cast(Idx)) { - unsigned IdxVal = CIdx->getZExtValue(); - unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); - if (IdxVal < LoNumElts) - Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, - Lo.getValueType(), Lo, Elt, Idx); - else - Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getConstant(IdxVal - LoNumElts, - TLI.getVectorIdxTy())); - return; - } - - // See if the target wants to custom expand this node. - if (CustomLowerNode(N, N->getValueType(0), true)) - return; - - // Spill the vector to the stack. - EVT VecVT = Vec.getValueType(); - EVT EltVT = VecVT.getVectorElementType(); - SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); - - // Store the new element. This may be larger than the vector element type, - // so use a truncating store. - SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = - TLI.getDataLayout()->getPrefTypeAlignment(VecType); - Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, - false, false, 0); - - // Load the Lo part from the stack slot. - Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, 0); - - // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, StackPtr.getValueType())); - - // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, false, MinAlign(Alignment, IncrementSize)); -} - -void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, - SDValue &Hi) { - EVT LoVT, HiVT; - SDLoc dl(N); - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); - Hi = DAG.getUNDEF(HiVT); -} - -void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, - SDValue &Hi) { - assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); - EVT LoVT, HiVT; - SDLoc dl(LD); - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); - - ISD::LoadExtType ExtType = LD->getExtensionType(); - SDValue Ch = LD->getChain(); - SDValue Ptr = LD->getBasePtr(); - SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); - EVT MemoryVT = LD->getMemoryVT(); - unsigned Alignment = LD->getOriginalAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); - AAMDNodes AAInfo = LD->getAAInfo(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment, AAInfo); - - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); - Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, - AAInfo); - - // Build a factor node to remember that this load is independent of the - // other one. - Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - ReplaceValueWith(SDValue(LD, 1), Ch); -} - -void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { - assert(N->getValueType(0).isVector() && - N->getOperand(0).getValueType().isVector() && - "Operand types must be vectors"); - - EVT LoVT, HiVT; - SDLoc DL(N); - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the input. - SDValue LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); -} - -void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, - SDValue &Hi) { - // Get the dest types - they may not match the input types, e.g. int_to_fp. - EVT LoVT, HiVT; - SDLoc dl(N); - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // If the input also splits, handle it directly for a compile time speedup. - // Otherwise split it by hand. - EVT InVT = N->getOperand(0).getValueType(); - if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) - GetSplitVector(N->getOperand(0), Lo, Hi); - else - std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); - - if (N->getOpcode() == ISD::FP_ROUND) { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); - } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) { - SDValue DTyOpLo = DAG.getValueType(LoVT); - SDValue DTyOpHi = DAG.getValueType(HiVT); - SDValue STyOpLo = DAG.getValueType(Lo.getValueType()); - SDValue STyOpHi = DAG.getValueType(Hi.getValueType()); - SDValue RndOp = N->getOperand(3); - SDValue SatOp = N->getOperand(4); - ISD::CvtCode CvtCode = cast(N)->getCvtCode(); - Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp, - CvtCode); - Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp, - CvtCode); - } else { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); - } -} - -void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDLoc dl(N); - EVT SrcVT = N->getOperand(0).getValueType(); - EVT DestVT = N->getValueType(0); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); - - // We can do better than a generic split operation if the extend is doing - // more than just doubling the width of the elements and the following are - // true: - // - The number of vector elements is even, - // - the source type is legal, - // - the type of a split source is illegal, - // - the type of an extended (by doubling element size) source is legal, and - // - the type of that extended source when split is legal. - // - // This won't necessarily completely legalize the operation, but it will - // more effectively move in the right direction and prevent falling down - // to scalarization in many cases due to the input vector being split too - // far. - unsigned NumElements = SrcVT.getVectorNumElements(); - if ((NumElements & 1) == 0 && - SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { - LLVMContext &Ctx = *DAG.getContext(); - EVT NewSrcVT = EVT::getVectorVT( - Ctx, EVT::getIntegerVT( - Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2), - NumElements); - EVT SplitSrcVT = - EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); - EVT SplitLoVT, SplitHiVT; - std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); - if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && - TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { - DEBUG(dbgs() << "Split vector extend via incremental extend:"; - N->dump(&DAG); dbgs() << "\n"); - // Extend the source vector by one step. - SDValue NewSrc = - DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); - // Get the low and high halves of the new, extended one step, vector. - std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); - // Extend those vector halves the rest of the way. - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); - return; - } - } - // Fall back to the generic unary operator splitting otherwise. - SplitVecRes_UnaryOp(N, Lo, Hi); -} - -void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, - SDValue &Lo, SDValue &Hi) { - // The low and high parts of the original input give four input vectors. - SDValue Inputs[4]; - SDLoc dl(N); - GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); - GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); - EVT NewVT = Inputs[0].getValueType(); - unsigned NewElts = NewVT.getVectorNumElements(); - - // If Lo or Hi uses elements from at most two of the four input vectors, then - // express it as a vector shuffle of those two inputs. Otherwise extract the - // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. - SmallVector Ops; - for (unsigned High = 0; High < 2; ++High) { - SDValue &Output = High ? Hi : Lo; - - // Build a shuffle mask for the output, discovering on the fly which - // input vectors to use as shuffle operands (recorded in InputUsed). - // If building a suitable shuffle vector proves too hard, then bail - // out with useBuildVector set. - unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. - unsigned FirstMaskIdx = High * NewElts; - bool useBuildVector = false; - for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { - // The mask element. This indexes into the input. - int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); - - // The input vector this mask element indexes into. - unsigned Input = (unsigned)Idx / NewElts; - - if (Input >= array_lengthof(Inputs)) { - // The mask element does not index into any input vector. - Ops.push_back(-1); - continue; - } - - // Turn the index into an offset from the start of the input vector. - Idx -= Input * NewElts; - - // Find or create a shuffle vector operand to hold this input. - unsigned OpNo; - for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { - if (InputUsed[OpNo] == Input) { - // This input vector is already an operand. - break; - } else if (InputUsed[OpNo] == -1U) { - // Create a new operand for this input vector. - InputUsed[OpNo] = Input; - break; - } - } - - if (OpNo >= array_lengthof(InputUsed)) { - // More than two input vectors used! Give up on trying to create a - // shuffle vector. Insert all elements into a BUILD_VECTOR instead. - useBuildVector = true; - break; - } - - // Add the mask index for the new shuffle vector. - Ops.push_back(Idx + OpNo * NewElts); - } - - if (useBuildVector) { - EVT EltVT = NewVT.getVectorElementType(); - SmallVector SVOps; - - // Extract the input elements by hand. - for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { - // The mask element. This indexes into the input. - int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); - - // The input vector this mask element indexes into. - unsigned Input = (unsigned)Idx / NewElts; - - if (Input >= array_lengthof(Inputs)) { - // The mask element is "undef" or indexes off the end of the input. - SVOps.push_back(DAG.getUNDEF(EltVT)); - continue; - } - - // Turn the index into an offset from the start of the input vector. - Idx -= Input * NewElts; - - // Extract the vector element by hand. - SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], DAG.getConstant(Idx, - TLI.getVectorIdxTy()))); - } - - // Construct the Lo/Hi output using a BUILD_VECTOR. - Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps); - } else if (InputUsed[0] == -1U) { - // No input vectors were used! The result is undefined. - Output = DAG.getUNDEF(NewVT); - } else { - SDValue Op0 = Inputs[InputUsed[0]]; - // If only one input was used, use an undefined vector for the other. - SDValue Op1 = InputUsed[1] == -1U ? - DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; - // At least one input vector was used. Create a new shuffle vector. - Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); - } - - Ops.clear(); - } -} - - -//===----------------------------------------------------------------------===// -// Operand Vector Splitting -//===----------------------------------------------------------------------===// - -/// SplitVectorOperand - This method is called when the specified operand of the -/// specified node is found to need vector splitting. At this point, all of the -/// result types of the node are known to be legal, but other operands of the -/// node may need legalization as well as the specified one. -bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Split node operand: "; - N->dump(&DAG); - dbgs() << "\n"); - SDValue Res = SDValue(); - - // See if the target wants to custom split this node. - if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) - return false; - - if (!Res.getNode()) { - switch (N->getOpcode()) { - default: -#ifndef NDEBUG - dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"; -#endif - report_fatal_error("Do not know how to split this operator's " - "operand!\n"); - - case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; - case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; - case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; - case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; - case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; - case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break; - case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; - case ISD::STORE: - Res = SplitVecOp_STORE(cast(N), OpNo); - break; - case ISD::VSELECT: - Res = SplitVecOp_VSELECT(N, OpNo); - break; - case ISD::CTTZ: - case ISD::CTLZ: - case ISD::CTPOP: - case ISD::FP_EXTEND: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - case ISD::FTRUNC: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - Res = SplitVecOp_UnaryOp(N); - break; - } - } - - // If the result is null, the sub-method took care of registering results etc. - if (!Res.getNode()) return false; - - // If the result is N, the sub-method updated N in place. Tell the legalizer - // core about this. - if (Res.getNode() == N) - return true; - - assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand expansion"); - - ReplaceValueWith(SDValue(N, 0), Res); - return false; -} - -SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { - // The only possibility for an illegal operand is the mask, since result type - // legalization would have handled this node already otherwise. - assert(OpNo == 0 && "Illegal operand must be mask"); - - SDValue Mask = N->getOperand(0); - SDValue Src0 = N->getOperand(1); - SDValue Src1 = N->getOperand(2); - EVT Src0VT = Src0.getValueType(); - SDLoc DL(N); - assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); - - SDValue Lo, Hi; - GetSplitVector(N->getOperand(0), Lo, Hi); - assert(Lo.getValueType() == Hi.getValueType() && - "Lo and Hi have differing types"); - - EVT LoOpVT, HiOpVT; - std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); - assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); - - SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; - std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); - std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); - std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); - - SDValue LoSelect = - DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); - SDValue HiSelect = - DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1); - - return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); -} - -SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { - // The result has a legal vector type, but the input needs splitting. - EVT ResVT = N->getValueType(0); - SDValue Lo, Hi; - SDLoc dl(N); - GetSplitVector(N->getOperand(0), Lo, Hi); - EVT InVT = Lo.getValueType(); - - EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), - InVT.getVectorNumElements()); - - Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); -} - -SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { - // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will - // end up being split all the way down to individual components. Convert the - // split pieces into integers and reassemble. - SDValue Lo, Hi; - GetSplitVector(N->getOperand(0), Lo, Hi); - Lo = BitConvertToInteger(Lo); - Hi = BitConvertToInteger(Hi); - - if (TLI.isBigEndian()) - std::swap(Lo, Hi); - - return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), - JoinIntegers(Lo, Hi)); -} - -SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { - // We know that the extracted result type is legal. - EVT SubVT = N->getValueType(0); - SDValue Idx = N->getOperand(1); - SDLoc dl(N); - SDValue Lo, Hi; - GetSplitVector(N->getOperand(0), Lo, Hi); - - uint64_t LoElts = Lo.getValueType().getVectorNumElements(); - uint64_t IdxVal = cast(Idx)->getZExtValue(); - - if (IdxVal < LoElts) { - assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && - "Extracted subvector crosses vector split!"); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); - } else { - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, - DAG.getConstant(IdxVal - LoElts, Idx.getValueType())); - } -} - -SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { - SDValue Vec = N->getOperand(0); - SDValue Idx = N->getOperand(1); - EVT VecVT = Vec.getValueType(); - - if (isa(Idx)) { - uint64_t IdxVal = cast(Idx)->getZExtValue(); - assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); - - SDValue Lo, Hi; - GetSplitVector(Vec, Lo, Hi); - - uint64_t LoElts = Lo.getValueType().getVectorNumElements(); - - if (IdxVal < LoElts) - return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); - return SDValue(DAG.UpdateNodeOperands(N, Hi, - DAG.getConstant(IdxVal - LoElts, - Idx.getValueType())), 0); - } - - // See if the target wants to custom expand this node. - if (CustomLowerNode(N, N->getValueType(0), true)) - return SDValue(); - - // Store the vector to the stack. - EVT EltVT = VecVT.getVectorElementType(); - SDLoc dl(N); - SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, - MachinePointerInfo(), false, false, 0); - - // Load back the required element. - StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT, false, false, false, 0); -} - -SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { - assert(N->isUnindexed() && "Indexed store of vector?"); - assert(OpNo == 1 && "Can only split the stored value"); - SDLoc DL(N); - - bool isTruncating = N->isTruncatingStore(); - SDValue Ch = N->getChain(); - SDValue Ptr = N->getBasePtr(); - EVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getOriginalAlignment(); - bool isVol = N->isVolatile(); - bool isNT = N->isNonTemporal(); - AAMDNodes AAInfo = N->getAAInfo(); - SDValue Lo, Hi; - GetSplitVector(N->getOperand(1), Lo, Hi); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - - if (isTruncating) - Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment, AAInfo); - else - Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment, AAInfo); - - // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, Ptr.getValueType())); - - if (isTruncating) - Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, - N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment, AAInfo); - else - Hi = DAG.getStore(Ch, DL, Hi, Ptr, - N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment, AAInfo); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); -} - -SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { - SDLoc DL(N); - - // The input operands all must have the same type, and we know the result - // type is valid. Convert this to a buildvector which extracts all the - // input elements. - // TODO: If the input elements are power-two vectors, we could convert this to - // a new CONCAT_VECTORS node with elements that are half-wide. - SmallVector Elts; - EVT EltVT = N->getValueType(0).getVectorElementType(); - for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { - SDValue Op = N->getOperand(op); - for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); - i != e; ++i) { - Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, - Op, DAG.getConstant(i, TLI.getVectorIdxTy()))); - - } - } - - return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); -} - -SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { - // The result type is legal, but the input type is illegal. If splitting - // ends up with the result type of each half still being legal, just - // do that. If, however, that would result in an illegal result type, - // we can try to get more clever with power-two vectors. Specifically, - // split the input type, but also widen the result element size, then - // concatenate the halves and truncate again. For example, consider a target - // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit - // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do: - // %inlo = v4i32 extract_subvector %in, 0 - // %inhi = v4i32 extract_subvector %in, 4 - // %lo16 = v4i16 trunc v4i32 %inlo - // %hi16 = v4i16 trunc v4i32 %inhi - // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16 - // %res = v8i8 trunc v8i16 %in16 - // - // Without this transform, the original truncate would end up being - // scalarized, which is pretty much always a last resort. - SDValue InVec = N->getOperand(0); - EVT InVT = InVec->getValueType(0); - EVT OutVT = N->getValueType(0); - unsigned NumElements = OutVT.getVectorNumElements(); - // Widening should have already made sure this is a power-two vector - // if we're trying to split it at all. assert() that's true, just in case. - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - - unsigned InElementSize = InVT.getVectorElementType().getSizeInBits(); - unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits(); - - // If the input elements are only 1/2 the width of the result elements, - // just use the normal splitting. Our trick only work if there's room - // to split more than once. - if (InElementSize <= OutElementSize * 2) - return SplitVecOp_UnaryOp(N); - SDLoc DL(N); - - // Extract the halves of the input via extract_subvector. - SDValue InLoVec, InHiVec; - std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); - // Truncate them to 1/2 the element size. - EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); - EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, - NumElements/2); - SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec); - SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec); - // Concatenate them to get the full intermediate truncation result. - EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); - SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, - HalfHi); - // Now finish up by truncating all the way down to the original result - // type. This should normally be something that ends up being legal directly, - // but in theory if a target has very wide vectors and an annoyingly - // restricted set of legal types, this split can chain to build things up. - return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); -} - -SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { - assert(N->getValueType(0).isVector() && - N->getOperand(0).getValueType().isVector() && - "Operand types must be vectors"); - // The result has a legal vector type, but the input needs splitting. - SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; - SDLoc DL(N); - GetSplitVector(N->getOperand(0), Lo0, Hi0); - GetSplitVector(N->getOperand(1), Lo1, Hi1); - unsigned PartElements = Lo0.getValueType().getVectorNumElements(); - EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements); - EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements); - - LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); - HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); - SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); - return PromoteTargetBoolean(Con, N->getValueType(0)); -} - - -SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { - // The result has a legal vector type, but the input needs splitting. - EVT ResVT = N->getValueType(0); - SDValue Lo, Hi; - SDLoc DL(N); - GetSplitVector(N->getOperand(0), Lo, Hi); - EVT InVT = Lo.getValueType(); - - EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), - InVT.getVectorNumElements()); - - Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); - Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); - - return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); -} - - - -//===----------------------------------------------------------------------===// -// Result Vector Widening -//===----------------------------------------------------------------------===// - -void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Widen node result " << ResNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); - - // See if the target wants to custom widen this node. - if (CustomWidenLowerNode(N, N->getValueType(ResNo))) - return; - - SDValue Res = SDValue(); - switch (N->getOpcode()) { - default: -#ifndef NDEBUG - dbgs() << "WidenVectorResult #" << ResNo << ": "; - N->dump(&DAG); - dbgs() << "\n"; -#endif - llvm_unreachable("Do not know how to widen the result of this operator!"); - - case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; - case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; - case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; - case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; - case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; - case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; - case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; - case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; - case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; - case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; - case ISD::VSELECT: - case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; - case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; - case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; - case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; - case ISD::VECTOR_SHUFFLE: - Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); - break; - - case ISD::ADD: - case ISD::AND: - case ISD::MUL: - case ISD::MULHS: - case ISD::MULHU: - case ISD::OR: - case ISD::SUB: - case ISD::XOR: - case ISD::FMINNUM: - case ISD::FMAXNUM: - Res = WidenVecRes_Binary(N); - break; - - case ISD::FADD: - case ISD::FCOPYSIGN: - case ISD::FMUL: - case ISD::FPOW: - case ISD::FSUB: - case ISD::FDIV: - case ISD::FREM: - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: - Res = WidenVecRes_BinaryCanTrap(N); - break; - - case ISD::FPOWI: - Res = WidenVecRes_POWI(N); - break; - - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - Res = WidenVecRes_Shift(N); - break; - - case ISD::ANY_EXTEND: - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::SIGN_EXTEND: - case ISD::SINT_TO_FP: - case ISD::TRUNCATE: - case ISD::UINT_TO_FP: - case ISD::ZERO_EXTEND: - Res = WidenVecRes_Convert(N); - break; - - case ISD::BSWAP: - case ISD::CTLZ: - case ISD::CTPOP: - case ISD::CTTZ: - case ISD::FABS: - case ISD::FCEIL: - case ISD::FCOS: - case ISD::FEXP: - case ISD::FEXP2: - case ISD::FFLOOR: - case ISD::FLOG: - case ISD::FLOG10: - case ISD::FLOG2: - case ISD::FNEARBYINT: - case ISD::FNEG: - case ISD::FRINT: - case ISD::FROUND: - case ISD::FSIN: - case ISD::FSQRT: - case ISD::FTRUNC: - Res = WidenVecRes_Unary(N); - break; - case ISD::FMA: - Res = WidenVecRes_Ternary(N); - break; - } - - // If Res is null, the sub-method took care of registering the result. - if (Res.getNode()) - SetWidenedVector(SDValue(N, ResNo), Res); -} - -SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { - // Ternary op widening. - SDLoc dl(N); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - SDValue InOp3 = GetWidenedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); -} - -SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { - // Binary op widening. - SDLoc dl(N); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); -} - -SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { - // Binary op widening for operations that can trap. - unsigned Opcode = N->getOpcode(); - SDLoc dl(N); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - EVT WidenEltVT = WidenVT.getVectorElementType(); - EVT VT = WidenVT; - unsigned NumElts = VT.getVectorNumElements(); - while (!TLI.isTypeLegal(VT) && NumElts != 1) { - NumElts = NumElts / 2; - VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } - - if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { - // Operation doesn't trap so just widen as normal. - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); - } - - // No legal vector version so unroll the vector operation and then widen. - if (NumElts == 1) - return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); - - // Since the operation can trap, apply operation on the original vector. - EVT MaxVT = VT; - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); - - SmallVector ConcatOps(CurNumElts); - unsigned ConcatEnd = 0; // Current ConcatOps index. - int Idx = 0; // Current Idx into input vectors. - - // NumElts := greatest legal vector size (at most WidenVT) - // while (orig. vector has unhandled elements) { - // take munches of size NumElts from the beginning and add to ConcatOps - // NumElts := next smaller supported vector size or 1 - // } - while (CurNumElts != 0) { - while (CurNumElts >= NumElts) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); - Idx += NumElts; - CurNumElts -= NumElts; - } - do { - NumElts = NumElts / 2; - VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } while (!TLI.isTypeLegal(VT) && NumElts != 1); - - if (NumElts == 1) { - for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { - SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, DAG.getConstant(Idx, - TLI.getVectorIdxTy())); - SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, DAG.getConstant(Idx, - TLI.getVectorIdxTy())); - ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, - EOp1, EOp2); - } - CurNumElts = 0; - } - } - - // Check to see if we have a single operation with the widen type. - if (ConcatEnd == 1) { - VT = ConcatOps[0].getValueType(); - if (VT == WidenVT) - return ConcatOps[0]; - } - - // while (Some element of ConcatOps is not of type MaxVT) { - // From the end of ConcatOps, collect elements of the same type and put - // them into an op of the next larger supported type - // } - while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { - Idx = ConcatEnd - 1; - VT = ConcatOps[Idx--].getValueType(); - while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) - Idx--; - - int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; - EVT NextVT; - do { - NextSize *= 2; - NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); - } while (!TLI.isTypeLegal(NextVT)); - - if (!VT.isVector()) { - // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT - SDValue VecOp = DAG.getUNDEF(NextVT); - unsigned NumToInsert = ConcatEnd - Idx - 1; - for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], DAG.getConstant(i, - TLI.getVectorIdxTy())); - } - ConcatOps[Idx+1] = VecOp; - ConcatEnd = Idx + 2; - } else { - // Vector type, create a CONCAT_VECTORS of type NextVT - SDValue undefVec = DAG.getUNDEF(VT); - unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); - SmallVector SubConcatOps(OpsToConcat); - unsigned RealVals = ConcatEnd - Idx - 1; - unsigned SubConcatEnd = 0; - unsigned SubConcatIdx = Idx + 1; - while (SubConcatEnd < RealVals) - SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; - while (SubConcatEnd < OpsToConcat) - SubConcatOps[SubConcatEnd++] = undefVec; - ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, - NextVT, SubConcatOps); - ConcatEnd = SubConcatIdx + 1; - } - } - - // Check to see if we have a single operation with the widen type. - if (ConcatEnd == 1) { - VT = ConcatOps[0].getValueType(); - if (VT == WidenVT) - return ConcatOps[0]; - } - - // add undefs of size MaxVT until ConcatOps grows to length of WidenVT - unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); - if (NumOps != ConcatEnd ) { - SDValue UndefVal = DAG.getUNDEF(MaxVT); - for (unsigned j = ConcatEnd; j < NumOps; ++j) - ConcatOps[j] = UndefVal; - } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - makeArrayRef(ConcatOps.data(), NumOps)); -} - -SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { - SDValue InOp = N->getOperand(0); - SDLoc DL(N); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - - EVT InVT = InOp.getValueType(); - EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); - - unsigned Opcode = N->getOpcode(); - unsigned InVTNumElts = InVT.getVectorNumElements(); - - if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { - InOp = GetWidenedVector(N->getOperand(0)); - InVT = InOp.getValueType(); - InVTNumElts = InVT.getVectorNumElements(); - if (InVTNumElts == WidenNumElts) { - if (N->getNumOperands() == 1) - return DAG.getNode(Opcode, DL, WidenVT, InOp); - return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); - } - } - - if (TLI.isTypeLegal(InWidenVT)) { - // Because the result and the input are different vector types, widening - // the result could create a legal type but widening the input might make - // it an illegal type that might lead to repeatedly splitting the input - // and then widening it. To avoid this, we widen the input only if - // it results in a legal type. - if (WidenNumElts % InVTNumElts == 0) { - // Widen the input and call convert on the widened input vector. - unsigned NumConcat = WidenNumElts/InVTNumElts; - SmallVector Ops(NumConcat); - Ops[0] = InOp; - SDValue UndefVal = DAG.getUNDEF(InVT); - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = UndefVal; - SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); - if (N->getNumOperands() == 1) - return DAG.getNode(Opcode, DL, WidenVT, InVec); - return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); - } - - if (InVTNumElts % WidenNumElts == 0) { - SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, - InOp, DAG.getConstant(0, - TLI.getVectorIdxTy())); - // Extract the input and convert the shorten input vector. - if (N->getNumOperands() == 1) - return DAG.getNode(Opcode, DL, WidenVT, InVal); - return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1)); - } - } - - // Otherwise unroll into some nasty scalar code and rebuild the vector. - SmallVector Ops(WidenNumElts); - EVT EltVT = WidenVT.getVectorElementType(); - unsigned MinElts = std::min(InVTNumElts, WidenNumElts); - unsigned i; - for (i=0; i < MinElts; ++i) { - SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getConstant(i, TLI.getVectorIdxTy())); - if (N->getNumOperands() == 1) - Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); - else - Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); - } - - SDValue UndefVal = DAG.getUNDEF(EltVT); - for (; i < WidenNumElts; ++i) - Ops[i] = UndefVal; - - return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); -} - -SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp = GetWidenedVector(N->getOperand(0)); - SDValue ShOp = N->getOperand(1); - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); -} - -SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp = GetWidenedVector(N->getOperand(0)); - SDValue ShOp = N->getOperand(1); - - EVT ShVT = ShOp.getValueType(); - if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) { - ShOp = GetWidenedVector(ShOp); - ShVT = ShOp.getValueType(); - } - EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), - ShVT.getVectorElementType(), - WidenVT.getVectorNumElements()); - if (ShVT != ShWidenVT) - ShOp = ModifyToType(ShOp, ShWidenVT); - - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); -} - -SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { - // Unary op widening. - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); -} - -SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), - cast(N->getOperand(1))->getVT() - .getVectorElementType(), - WidenVT.getVectorNumElements()); - SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - WidenVT, WidenLHS, DAG.getValueType(ExtVT)); -} - -SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { - SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); - return GetWidenedVector(WidenVec); -} - -SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { - SDValue InOp = N->getOperand(0); - EVT InVT = InOp.getValueType(); - EVT VT = N->getValueType(0); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - SDLoc dl(N); - - switch (getTypeAction(InVT)) { - case TargetLowering::TypeLegal: - break; - case TargetLowering::TypePromoteInteger: - // If the incoming type is a vector that is being promoted, then - // we know that the elements are arranged differently and that we - // must perform the conversion using a stack slot. - if (InVT.isVector()) - break; - - // If the InOp is promoted to the same size, convert it. Otherwise, - // fall out of the switch and widen the promoted input. - InOp = GetPromotedInteger(InOp); - InVT = InOp.getValueType(); - if (WidenVT.bitsEq(InVT)) - return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); - break; - case TargetLowering::TypeSoftenFloat: - case TargetLowering::TypeExpandInteger: - case TargetLowering::TypeExpandFloat: - case TargetLowering::TypeScalarizeVector: - case TargetLowering::TypeSplitVector: - break; - case TargetLowering::TypeWidenVector: - // If the InOp is widened to the same size, convert it. Otherwise, fall - // out of the switch and widen the widened input. - InOp = GetWidenedVector(InOp); - InVT = InOp.getValueType(); - if (WidenVT.bitsEq(InVT)) - // The input widens to the same size. Convert to the widen value. - return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); - break; - } - - unsigned WidenSize = WidenVT.getSizeInBits(); - unsigned InSize = InVT.getSizeInBits(); - // x86mmx is not an acceptable vector element type, so don't try. - if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { - // Determine new input vector type. The new input vector type will use - // the same element type (if its a vector) or use the input type as a - // vector. It is the same size as the type to widen to. - EVT NewInVT; - unsigned NewNumElts = WidenSize / InSize; - if (InVT.isVector()) { - EVT InEltVT = InVT.getVectorElementType(); - NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, - WidenSize / InEltVT.getSizeInBits()); - } else { - NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); - } - - if (TLI.isTypeLegal(NewInVT)) { - // Because the result and the input are different vector types, widening - // the result could create a legal type but widening the input might make - // it an illegal type that might lead to repeatedly splitting the input - // and then widening it. To avoid this, we widen the input only if - // it results in a legal type. - SmallVector Ops(NewNumElts); - SDValue UndefVal = DAG.getUNDEF(InVT); - Ops[0] = InOp; - for (unsigned i = 1; i < NewNumElts; ++i) - Ops[i] = UndefVal; - - SDValue NewVec; - if (InVT.isVector()) - NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); - else - NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); - return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); - } - } - - return CreateStackStoreLoad(InOp, WidenVT); -} - -SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { - SDLoc dl(N); - // Build a vector with undefined for the new nodes. - EVT VT = N->getValueType(0); - - // Integer BUILD_VECTOR operands may be larger than the node's vector element - // type. The UNDEFs need to have the same type as the existing operands. - EVT EltVT = N->getOperand(0).getValueType(); - unsigned NumElts = VT.getVectorNumElements(); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - - SmallVector NewOps(N->op_begin(), N->op_end()); - assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); - NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps); -} - -SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { - EVT InVT = N->getOperand(0).getValueType(); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDLoc dl(N); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - unsigned NumInElts = InVT.getVectorNumElements(); - unsigned NumOperands = N->getNumOperands(); - - bool InputWidened = false; // Indicates we need to widen the input. - if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { - if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { - // Add undef vectors to widen to correct length. - unsigned NumConcat = WidenVT.getVectorNumElements() / - InVT.getVectorNumElements(); - SDValue UndefVal = DAG.getUNDEF(InVT); - SmallVector Ops(NumConcat); - for (unsigned i=0; i < NumOperands; ++i) - Ops[i] = N->getOperand(i); - for (unsigned i = NumOperands; i != NumConcat; ++i) - Ops[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops); - } - } else { - InputWidened = true; - if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { - // The inputs and the result are widen to the same value. - unsigned i; - for (i=1; i < NumOperands; ++i) - if (N->getOperand(i).getOpcode() != ISD::UNDEF) - break; - - if (i == NumOperands) - // Everything but the first operand is an UNDEF so just return the - // widened first operand. - return GetWidenedVector(N->getOperand(0)); - - if (NumOperands == 2) { - // Replace concat of two operands with a shuffle. - SmallVector MaskOps(WidenNumElts, -1); - for (unsigned i = 0; i < NumInElts; ++i) { - MaskOps[i] = i; - MaskOps[i + NumInElts] = i + WidenNumElts; - } - return DAG.getVectorShuffle(WidenVT, dl, - GetWidenedVector(N->getOperand(0)), - GetWidenedVector(N->getOperand(1)), - &MaskOps[0]); - } - } - } - - // Fall back to use extracts and build vector. - EVT EltVT = WidenVT.getVectorElementType(); - SmallVector Ops(WidenNumElts); - unsigned Idx = 0; - for (unsigned i=0; i < NumOperands; ++i) { - SDValue InOp = N->getOperand(i); - if (InputWidened) - InOp = GetWidenedVector(InOp); - for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, TLI.getVectorIdxTy())); - } - SDValue UndefVal = DAG.getUNDEF(EltVT); - for (; Idx < WidenNumElts; ++Idx) - Ops[Idx] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); -} - -SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { - SDLoc dl(N); - SDValue InOp = N->getOperand(0); - SDValue RndOp = N->getOperand(3); - SDValue SatOp = N->getOperand(4); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - - EVT InVT = InOp.getValueType(); - EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); - - SDValue DTyOp = DAG.getValueType(WidenVT); - SDValue STyOp = DAG.getValueType(InWidenVT); - ISD::CvtCode CvtCode = cast(N)->getCvtCode(); - - unsigned InVTNumElts = InVT.getVectorNumElements(); - if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { - InOp = GetWidenedVector(InOp); - InVT = InOp.getValueType(); - InVTNumElts = InVT.getVectorNumElements(); - if (InVTNumElts == WidenNumElts) - return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - - if (TLI.isTypeLegal(InWidenVT)) { - // Because the result and the input are different vector types, widening - // the result could create a legal type but widening the input might make - // it an illegal type that might lead to repeatedly splitting the input - // and then widening it. To avoid this, we widen the input only if - // it results in a legal type. - if (WidenNumElts % InVTNumElts == 0) { - // Widen the input and call convert on the widened input vector. - unsigned NumConcat = WidenNumElts/InVTNumElts; - SmallVector Ops(NumConcat); - Ops[0] = InOp; - SDValue UndefVal = DAG.getUNDEF(InVT); - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = UndefVal; - - InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops); - return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - - if (InVTNumElts % WidenNumElts == 0) { - // Extract the input and convert the shorten input vector. - InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); - return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - } - - // Otherwise unroll into some nasty scalar code and rebuild the vector. - SmallVector Ops(WidenNumElts); - EVT EltVT = WidenVT.getVectorElementType(); - DTyOp = DAG.getValueType(EltVT); - STyOp = DAG.getValueType(InEltVT); - - unsigned MinElts = std::min(InVTNumElts, WidenNumElts); - unsigned i; - for (i=0; i < MinElts; ++i) { - SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, TLI.getVectorIdxTy())); - Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, - SatOp, CvtCode); - } - - SDValue UndefVal = DAG.getUNDEF(EltVT); - for (; i < WidenNumElts; ++i) - Ops[i] = UndefVal; - - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); -} - -SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { - EVT VT = N->getValueType(0); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - SDValue InOp = N->getOperand(0); - SDValue Idx = N->getOperand(1); - SDLoc dl(N); - - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) - InOp = GetWidenedVector(InOp); - - EVT InVT = InOp.getValueType(); - - // Check if we can just return the input vector after widening. - uint64_t IdxVal = cast(Idx)->getZExtValue(); - if (IdxVal == 0 && InVT == WidenVT) - return InOp; - - // Check if we can extract from the vector. - unsigned InNumElts = InVT.getVectorNumElements(); - if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); - - // We could try widening the input to the right length but for now, extract - // the original elements, fill the rest with undefs and build a vector. - SmallVector Ops(WidenNumElts); - EVT EltVT = VT.getVectorElementType(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned i; - for (i=0; i < NumElts; ++i) - Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy())); - - SDValue UndefVal = DAG.getUNDEF(EltVT); - for (; i < WidenNumElts; ++i) - Ops[i] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); -} - -SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { - SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), - InOp.getValueType(), InOp, - N->getOperand(1), N->getOperand(2)); -} - -SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { - LoadSDNode *LD = cast(N); - ISD::LoadExtType ExtType = LD->getExtensionType(); - - SDValue Result; - SmallVector LdChain; // Chain for the series of load - if (ExtType != ISD::NON_EXTLOAD) - Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); - else - Result = GenWidenVectorLoads(LdChain, LD); - - // If we generate a single load, we can use that for the chain. Otherwise, - // build a factor node to remember the multiple loads are independent and - // chain to that. - SDValue NewChain; - if (LdChain.size() == 1) - NewChain = LdChain[0]; - else - NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); - - // Modified the chain - switch anything that used the old chain to use - // the new one. - ReplaceValueWith(SDValue(N, 1), NewChain); - - return Result; -} - -SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), - WidenVT, N->getOperand(0)); -} - -SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - - SDValue Cond1 = N->getOperand(0); - EVT CondVT = Cond1.getValueType(); - if (CondVT.isVector()) { - EVT CondEltVT = CondVT.getVectorElementType(); - EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), - CondEltVT, WidenNumElts); - if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) - Cond1 = GetWidenedVector(Cond1); - - // If we have to split the condition there is no point in widening the - // select. This would result in an cycle of widening the select -> - // widening the condition operand -> splitting the condition operand -> - // splitting the select -> widening the select. Instead split this select - // further and widen the resulting type. - if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) { - SDValue SplitSelect = SplitVecOp_VSELECT(N, 0); - SDValue Res = ModifyToType(SplitSelect, WidenVT); - return Res; - } - - if (Cond1.getValueType() != CondWidenVT) - Cond1 = ModifyToType(Cond1, CondWidenVT); - } - - SDValue InOp1 = GetWidenedVector(N->getOperand(1)); - SDValue InOp2 = GetWidenedVector(N->getOperand(2)); - assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(N->getOpcode(), SDLoc(N), - WidenVT, Cond1, InOp1, InOp2); -} - -SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { - SDValue InOp1 = GetWidenedVector(N->getOperand(2)); - SDValue InOp2 = GetWidenedVector(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, SDLoc(N), - InOp1.getValueType(), N->getOperand(0), - N->getOperand(1), InOp1, InOp2, N->getOperand(4)); -} - -SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { - assert(N->getValueType(0).isVector() == - N->getOperand(0).getValueType().isVector() && - "Scalar/Vector type mismatch"); - if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, - InOp1, InOp2, N->getOperand(2)); -} - -SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getUNDEF(WidenVT); -} - -SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { - EVT VT = N->getValueType(0); - SDLoc dl(N); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - unsigned NumElts = VT.getVectorNumElements(); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - - // Adjust mask based on new input vector length. - SmallVector NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = N->getMaskElt(i); - if (Idx < (int)NumElts) - NewMask.push_back(Idx); - else - NewMask.push_back(Idx - NumElts + WidenNumElts); - } - for (unsigned i = NumElts; i != WidenNumElts; ++i) - NewMask.push_back(-1); - return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); -} - -SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { - assert(N->getValueType(0).isVector() && - N->getOperand(0).getValueType().isVector() && - "Operands must be vectors"); - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - - SDValue InOp1 = N->getOperand(0); - EVT InVT = InOp1.getValueType(); - assert(InVT.isVector() && "can not widen non-vector type"); - EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), WidenNumElts); - InOp1 = GetWidenedVector(InOp1); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - - // Assume that the input and output will be widen appropriately. If not, - // we will have to unroll it at some point. - assert(InOp1.getValueType() == WidenInVT && - InOp2.getValueType() == WidenInVT && - "Input not widened to expected type!"); - (void)WidenInVT; - return DAG.getNode(ISD::SETCC, SDLoc(N), - WidenVT, InOp1, InOp2, N->getOperand(2)); -} - - -//===----------------------------------------------------------------------===// -// Widen Vector Operand -//===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); - SDValue Res = SDValue(); - - // See if the target wants to custom widen this node. - if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) - return false; - - switch (N->getOpcode()) { - default: -#ifndef NDEBUG - dbgs() << "WidenVectorOperand op #" << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"; -#endif - llvm_unreachable("Do not know how to widen this operator's operand!"); - - case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; - case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; - case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; - case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; - case ISD::STORE: Res = WidenVecOp_STORE(N); break; - case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; - - case ISD::ANY_EXTEND: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - Res = WidenVecOp_EXTEND(N); - break; - - case ISD::FP_EXTEND: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - case ISD::TRUNCATE: - Res = WidenVecOp_Convert(N); - break; - } - - // If Res is null, the sub-method took care of registering the result. - if (!Res.getNode()) return false; - - // If the result is N, the sub-method updated N in place. Tell the legalizer - // core about this. - if (Res.getNode() == N) - return true; - - - assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand expansion"); - - ReplaceValueWith(SDValue(N, 0), Res); - return false; -} - -SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { - SDLoc DL(N); - EVT VT = N->getValueType(0); - - SDValue InOp = N->getOperand(0); - // If some legalization strategy other than widening is used on the operand, - // we can't safely assume that just extending the low lanes is the correct - // transformation. - if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) - return WidenVecOp_Convert(N); - InOp = GetWidenedVector(InOp); - assert(VT.getVectorNumElements() < - InOp.getValueType().getVectorNumElements() && - "Input wasn't widened!"); - - // We may need to further widen the operand until it has the same total - // vector size as the result. - EVT InVT = InOp.getValueType(); - if (InVT.getSizeInBits() != VT.getSizeInBits()) { - EVT InEltVT = InVT.getVectorElementType(); - for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { - EVT FixedVT = (MVT::SimpleValueType)i; - EVT FixedEltVT = FixedVT.getVectorElementType(); - if (TLI.isTypeLegal(FixedVT) && - FixedVT.getSizeInBits() == VT.getSizeInBits() && - FixedEltVT == InEltVT) { - assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && - "Not enough elements in the fixed type for the operand!"); - assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && - "We can't have the same type as we started with!"); - if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) - InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, - DAG.getUNDEF(FixedVT), InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); - else - InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); - break; - } - } - InVT = InOp.getValueType(); - if (InVT.getSizeInBits() != VT.getSizeInBits()) - // We couldn't find a legal vector type that was a widening of the input - // and could be extended in-register to the result type, so we have to - // scalarize. - return WidenVecOp_Convert(N); - } - - // Use special DAG nodes to represent the operation of extending the - // low lanes. - switch (N->getOpcode()) { - default: - llvm_unreachable("Extend legalization on on extend operation!"); - case ISD::ANY_EXTEND: - return DAG.getAnyExtendVectorInReg(InOp, DL, VT); - case ISD::SIGN_EXTEND: - return DAG.getSignExtendVectorInReg(InOp, DL, VT); - case ISD::ZERO_EXTEND: - return DAG.getZeroExtendVectorInReg(InOp, DL, VT); - } -} - -SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { - // Since the result is legal and the input is illegal, it is unlikely - // that we can fix the input to a legal type so unroll the convert - // into some scalar code and create a nasty build vector. - EVT VT = N->getValueType(0); - EVT EltVT = VT.getVectorElementType(); - SDLoc dl(N); - unsigned NumElts = VT.getVectorNumElements(); - SDValue InOp = N->getOperand(0); - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) - InOp = GetWidenedVector(InOp); - EVT InVT = InOp.getValueType(); - EVT InEltVT = InVT.getVectorElementType(); - - unsigned Opcode = N->getOpcode(); - SmallVector Ops(NumElts); - for (unsigned i=0; i < NumElts; ++i) - Ops[i] = DAG.getNode(Opcode, dl, EltVT, - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, TLI.getVectorIdxTy()))); - - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); -} - -SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { - EVT VT = N->getValueType(0); - SDValue InOp = GetWidenedVector(N->getOperand(0)); - EVT InWidenVT = InOp.getValueType(); - SDLoc dl(N); - - // Check if we can convert between two legal vector types and extract. - unsigned InWidenSize = InWidenVT.getSizeInBits(); - unsigned Size = VT.getSizeInBits(); - // x86mmx is not an acceptable vector element type, so don't try. - if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { - unsigned NewNumElts = InWidenSize / Size; - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); - if (TLI.isTypeLegal(NewVT)) { - SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); - } - } - - return CreateStackStoreLoad(InOp, VT); -} - -SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { - // If the input vector is not legal, it is likely that we will not find a - // legal vector of the same size. Replace the concatenate vector with a - // nasty build vector. - EVT VT = N->getValueType(0); - EVT EltVT = VT.getVectorElementType(); - SDLoc dl(N); - unsigned NumElts = VT.getVectorNumElements(); - SmallVector Ops(NumElts); - - EVT InVT = N->getOperand(0).getValueType(); - unsigned NumInElts = InVT.getVectorNumElements(); - - unsigned Idx = 0; - unsigned NumOperands = N->getNumOperands(); - for (unsigned i=0; i < NumOperands; ++i) { - SDValue InOp = N->getOperand(i); - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) - InOp = GetWidenedVector(InOp); - for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, TLI.getVectorIdxTy())); - } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); -} - -SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { - SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), - N->getValueType(0), InOp, N->getOperand(1)); -} - -SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { - SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), - N->getValueType(0), InOp, N->getOperand(1)); -} - -SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { - // We have to widen the value but we want only to store the original - // vector type. - StoreSDNode *ST = cast(N); - - SmallVector StChain; - if (ST->isTruncatingStore()) - GenWidenVectorTruncStores(StChain, ST); - else - GenWidenVectorStores(StChain, ST); - - if (StChain.size() == 1) - return StChain[0]; - else - return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); -} - -SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { - SDValue InOp0 = GetWidenedVector(N->getOperand(0)); - SDValue InOp1 = GetWidenedVector(N->getOperand(1)); - SDLoc dl(N); - - // WARNING: In this code we widen the compare instruction with garbage. - // This garbage may contain denormal floats which may be slow. Is this a real - // concern ? Should we zero the unused lanes if this is a float compare ? - - // Get a new SETCC node to compare the newly widened operands. - // Only some of the compared elements are legal. - EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType()); - SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), - SVT, InOp0, InOp1, N->getOperand(2)); - - // Extract the needed results from the result vector. - EVT ResVT = EVT::getVectorVT(*DAG.getContext(), - SVT.getVectorElementType(), - N->getValueType(0).getVectorNumElements()); - SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, - ResVT, WideSETCC, DAG.getConstant(0, - TLI.getVectorIdxTy())); - - return PromoteTargetBoolean(CC, N->getValueType(0)); -} - - -//===----------------------------------------------------------------------===// -// Vector Widening Utilities -//===----------------------------------------------------------------------===// - -// Utility function to find the type to chop up a widen vector for load/store -// TLI: Target lowering used to determine legal types. -// Width: Width left need to load/store. -// WidenVT: The widen vector type to load to/store from -// Align: If 0, don't allow use of a wider type -// WidenEx: If Align is not 0, the amount additional we can load/store from. - -static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, - unsigned Width, EVT WidenVT, - unsigned Align = 0, unsigned WidenEx = 0) { - EVT WidenEltVT = WidenVT.getVectorElementType(); - unsigned WidenWidth = WidenVT.getSizeInBits(); - unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); - unsigned AlignInBits = Align*8; - - // If we have one element to load/store, return it. - EVT RetVT = WidenEltVT; - if (Width == WidenEltWidth) - return RetVT; - - // See if there is larger legal integer than the element type to load/store - unsigned VT; - for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; - VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { - EVT MemVT((MVT::SimpleValueType) VT); - unsigned MemVTWidth = MemVT.getSizeInBits(); - if (MemVT.getSizeInBits() <= WidenEltWidth) - break; - if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && - isPowerOf2_32(WidenWidth / MemVTWidth) && - (MemVTWidth <= Width || - (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { - RetVT = MemVT; - break; - } - } - - // See if there is a larger vector type to load/store that has the same vector - // element type and is evenly divisible with the WidenVT. - for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; - VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { - EVT MemVT = (MVT::SimpleValueType) VT; - unsigned MemVTWidth = MemVT.getSizeInBits(); - if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && - (WidenWidth % MemVTWidth) == 0 && - isPowerOf2_32(WidenWidth / MemVTWidth) && - (MemVTWidth <= Width || - (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { - if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) - return MemVT; - } - } - - return RetVT; -} - -// Builds a vector type from scalar loads -// VecTy: Resulting Vector type -// LDOps: Load operators to build a vector type -// [Start,End) the list of loads to use. -static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, - SmallVectorImpl &LdOps, - unsigned Start, unsigned End) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDLoc dl(LdOps[Start]); - EVT LdTy = LdOps[Start].getValueType(); - unsigned Width = VecTy.getSizeInBits(); - unsigned NumElts = Width / LdTy.getSizeInBits(); - EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); - - unsigned Idx = 1; - SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); - - for (unsigned i = Start + 1; i != End; ++i) { - EVT NewLdTy = LdOps[i].getValueType(); - if (NewLdTy != LdTy) { - NumElts = Width / NewLdTy.getSizeInBits(); - NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); - VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); - // Readjust position and vector position based on new load type - Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); - LdTy = NewLdTy; - } - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getConstant(Idx++, TLI.getVectorIdxTy())); - } - return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); -} - -SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD) { - // The strategy assumes that we can efficiently load powers of two widths. - // The routines chops the vector into the largest vector loads with the same - // element type or scalar loads and then recombines it to the widen vector - // type. - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); - unsigned WidenWidth = WidenVT.getSizeInBits(); - EVT LdVT = LD->getMemoryVT(); - SDLoc dl(LD); - assert(LdVT.isVector() && WidenVT.isVector()); - assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); - - // Load information - SDValue Chain = LD->getChain(); - SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); - AAMDNodes AAInfo = LD->getAAInfo(); - - int LdWidth = LdVT.getSizeInBits(); - int WidthDiff = WidenWidth - LdWidth; // Difference - unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads - - // Find the vector type that can load from. - EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); - int NewVTWidth = NewVT.getSizeInBits(); - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Align, - AAInfo); - LdChain.push_back(LdOp.getValue(1)); - - // Check if we can load the element with one instruction - if (LdWidth <= NewVTWidth) { - if (!NewVT.isVector()) { - unsigned NumElts = WidenWidth / NewVTWidth; - EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); - SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); - return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); - } - if (NewVT == WidenVT) - return LdOp; - - assert(WidenWidth % NewVTWidth == 0); - unsigned NumConcat = WidenWidth / NewVTWidth; - SmallVector ConcatOps(NumConcat); - SDValue UndefVal = DAG.getUNDEF(NewVT); - ConcatOps[0] = LdOp; - for (unsigned i = 1; i != NumConcat; ++i) - ConcatOps[i] = UndefVal; - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); - } - - // Load vector by using multiple loads from largest vector to scalar - SmallVector LdOps; - LdOps.push_back(LdOp); - - LdWidth -= NewVTWidth; - unsigned Offset = 0; - - while (LdWidth > 0) { - unsigned Increment = NewVTWidth / 8; - Offset += Increment; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, BasePtr.getValueType())); - - SDValue L; - if (LdWidth < NewVTWidth) { - // Our current type we are using is too large, find a better size - NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); - NewVTWidth = NewVT.getSizeInBits(); - L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment), - AAInfo); - LdChain.push_back(L.getValue(1)); - if (L->getValueType(0).isVector()) { - SmallVector Loads; - Loads.push_back(L); - unsigned size = L->getValueSizeInBits(0); - while (size < LdOp->getValueSizeInBits(0)) { - Loads.push_back(DAG.getUNDEF(L->getValueType(0))); - size += L->getValueSizeInBits(0); - } - L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads); - } - } else { - L = DAG.getLoad(NewVT, dl, Chain, BasePtr, - LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment), - AAInfo); - LdChain.push_back(L.getValue(1)); - } - - LdOps.push_back(L); - - - LdWidth -= NewVTWidth; - } - - // Build the vector from the loads operations - unsigned End = LdOps.size(); - if (!LdOps[0].getValueType().isVector()) - // All the loads are scalar loads. - return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); - - // If the load contains vectors, build the vector using concat vector. - // All of the vectors used to loads are power of 2 and the scalars load - // can be combined to make a power of 2 vector. - SmallVector ConcatOps(End); - int i = End - 1; - int Idx = End; - EVT LdTy = LdOps[i].getValueType(); - // First combine the scalar loads to a vector - if (!LdTy.isVector()) { - for (--i; i >= 0; --i) { - LdTy = LdOps[i].getValueType(); - if (LdTy.isVector()) - break; - } - ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); - } - ConcatOps[--Idx] = LdOps[i]; - for (--i; i >= 0; --i) { - EVT NewLdTy = LdOps[i].getValueType(); - if (NewLdTy != LdTy) { - // Create a larger vector - ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, - makeArrayRef(&ConcatOps[Idx], End - Idx)); - Idx = End - 1; - LdTy = NewLdTy; - } - ConcatOps[--Idx] = LdOps[i]; - } - - if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - makeArrayRef(&ConcatOps[Idx], End - Idx)); - - // We need to fill the rest with undefs to build the vector - unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); - SmallVector WidenOps(NumOps); - SDValue UndefVal = DAG.getUNDEF(LdTy); - { - unsigned i = 0; - for (; i != End-Idx; ++i) - WidenOps[i] = ConcatOps[Idx+i]; - for (; i != NumOps; ++i) - WidenOps[i] = UndefVal; - } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps); -} - -SDValue -DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, - LoadSDNode *LD, - ISD::LoadExtType ExtType) { - // For extension loads, it may not be more efficient to chop up the vector - // and then extended it. Instead, we unroll the load and build a new vector. - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); - EVT LdVT = LD->getMemoryVT(); - SDLoc dl(LD); - assert(LdVT.isVector() && WidenVT.isVector()); - - // Load information - SDValue Chain = LD->getChain(); - SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); - bool isVolatile = LD->isVolatile(); - bool isNonTemporal = LD->isNonTemporal(); - bool isInvariant = LD->isInvariant(); - AAMDNodes AAInfo = LD->getAAInfo(); - - EVT EltVT = WidenVT.getVectorElementType(); - EVT LdEltVT = LdVT.getVectorElementType(); - unsigned NumElts = LdVT.getVectorNumElements(); - - // Load each element and widen - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - SmallVector Ops(WidenNumElts); - unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, - LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, isInvariant, - Align, AAInfo); - LdChain.push_back(Ops[0].getValue(1)); - unsigned i = 0, Offset = Increment; - for (i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, - DAG.getConstant(Offset, - BasePtr.getValueType())); - Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, - LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, isInvariant, Align, - AAInfo); - LdChain.push_back(Ops[i].getValue(1)); - } - - // Fill the rest with undefs - SDValue UndefVal = DAG.getUNDEF(EltVT); - for (; i != WidenNumElts; ++i) - Ops[i] = UndefVal; - - return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); -} - - -void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, - StoreSDNode *ST) { - // The strategy assumes that we can efficiently store powers of two widths. - // The routines chops the vector into the largest vector stores with the same - // element type or scalar stores. - SDValue Chain = ST->getChain(); - SDValue BasePtr = ST->getBasePtr(); - unsigned Align = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - AAMDNodes AAInfo = ST->getAAInfo(); - SDValue ValOp = GetWidenedVector(ST->getValue()); - SDLoc dl(ST); - - EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); - EVT ValVT = ValOp.getValueType(); - unsigned ValWidth = ValVT.getSizeInBits(); - EVT ValEltVT = ValVT.getVectorElementType(); - unsigned ValEltWidth = ValEltVT.getSizeInBits(); - assert(StVT.getVectorElementType() == ValEltVT); - - int Idx = 0; // current index to store - unsigned Offset = 0; // offset from base to store - while (StWidth != 0) { - // Find the largest vector type we can store with - EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); - unsigned NewVTWidth = NewVT.getSizeInBits(); - unsigned Increment = NewVTWidth / 8; - if (NewVT.isVector()) { - unsigned NumVTElts = NewVT.getVectorNumElements(); - do { - SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo().getWithOffset(Offset), - isVolatile, isNonTemporal, - MinAlign(Align, Offset), AAInfo)); - StWidth -= NewVTWidth; - Offset += Increment; - Idx += NumVTElts; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, BasePtr.getValueType())); - } while (StWidth != 0 && StWidth >= NewVTWidth); - } else { - // Cast the vector to the scalar type we can store - unsigned NumElts = ValWidth / NewVTWidth; - EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); - SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); - // Readjust index position based on new vector type - Idx = Idx * ValEltWidth / NewVTWidth; - do { - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getConstant(Idx++, TLI.getVectorIdxTy())); - StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo().getWithOffset(Offset), - isVolatile, isNonTemporal, - MinAlign(Align, Offset), AAInfo)); - StWidth -= NewVTWidth; - Offset += Increment; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, BasePtr.getValueType())); - } while (StWidth != 0 && StWidth >= NewVTWidth); - // Restore index back to be relative to the original widen element type - Idx = Idx * NewVTWidth / ValEltWidth; - } - } -} - -void -DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, - StoreSDNode *ST) { - // For extension loads, it may not be more efficient to truncate the vector - // and then store it. Instead, we extract each element and then store it. - SDValue Chain = ST->getChain(); - SDValue BasePtr = ST->getBasePtr(); - unsigned Align = ST->getAlignment(); - bool isVolatile = ST->isVolatile(); - bool isNonTemporal = ST->isNonTemporal(); - AAMDNodes AAInfo = ST->getAAInfo(); - SDValue ValOp = GetWidenedVector(ST->getValue()); - SDLoc dl(ST); - - EVT StVT = ST->getMemoryVT(); - EVT ValVT = ValOp.getValueType(); - - // It must be true that we the widen vector type is bigger than where - // we need to store. - assert(StVT.isVector() && ValOp.getValueType().isVector()); - assert(StVT.bitsLT(ValOp.getValueType())); - - // For truncating stores, we can not play the tricks of chopping legal - // vector types and bit cast it to the right type. Instead, we unroll - // the store. - EVT StEltVT = StVT.getVectorElementType(); - EVT ValEltVT = ValVT.getVectorElementType(); - unsigned Increment = ValEltVT.getSizeInBits() / 8; - unsigned NumElts = StVT.getVectorNumElements(); - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo(), StEltVT, - isVolatile, isNonTemporal, Align, - AAInfo)); - unsigned Offset = Increment; - for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getConstant(Offset, - BasePtr.getValueType())); - SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, - ST->getPointerInfo().getWithOffset(Offset), - StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset), AAInfo)); - } -} - -/// Modifies a vector input (widen or narrows) to a vector of NVT. The -/// input vector must have the same element type as NVT. -SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { - // Note that InOp might have been widened so it might already have - // the right width or it might need be narrowed. - EVT InVT = InOp.getValueType(); - assert(InVT.getVectorElementType() == NVT.getVectorElementType() && - "input and widen element type must match"); - SDLoc dl(InOp); - - // Check if InOp already has the right width. - if (InVT == NVT) - return InOp; - - unsigned InNumElts = InVT.getVectorNumElements(); - unsigned WidenNumElts = NVT.getVectorNumElements(); - if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { - unsigned NumConcat = WidenNumElts / InNumElts; - SmallVector Ops(NumConcat); - SDValue UndefVal = DAG.getUNDEF(InVT); - Ops[0] = InOp; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = UndefVal; - - return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); - } - - if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getConstant(0, TLI.getVectorIdxTy())); - - // Fall back to extract and build. - SmallVector Ops(WidenNumElts); - EVT EltVT = NVT.getVectorElementType(); - unsigned MinNumElts = std::min(WidenNumElts, InNumElts); - unsigned Idx; - for (Idx = 0; Idx < MinNumElts; ++Idx) - Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(Idx, TLI.getVectorIdxTy())); - - SDValue UndefVal = DAG.getUNDEF(EltVT); - for ( ; Idx < WidenNumElts; ++Idx) - Ops[Idx] = UndefVal; - return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); -} +//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file performs vector type splitting and scalarization for LegalizeTypes. +// Scalarization is the act of changing a computation in an illegal one-element +// vector type to be a computation in its scalar element type. For example, +// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed +// as a base case when scalarizing vector arithmetic like <4 x f32>, which +// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 +// types. +// Splitting is the act of changing a computation in an invalid vector type to +// be a computation in two vectors of half the size. For example, implementing +// <128 x f32> operations in terms of two <64 x f32> operations. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +//===----------------------------------------------------------------------===// +// Result Vector Scalarization: <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to scalarize the result of this " + "operator!\n"); + + case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; + case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; + case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; + case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; + case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast(N));break; + case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; + case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; + case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; + case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::ANY_EXTEND: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: + R = ScalarizeVecRes_UnaryOp(N); + break; + + case ISD::ADD: + case ISD::AND: + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: + + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::SUB: + case ISD::UDIV: + case ISD::UREM: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + R = ScalarizeVecRes_BinOp(N); + break; + case ISD::FMA: + R = ScalarizeVecRes_TernaryOp(N); + break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetScalarizedVector(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + SDValue Op1 = GetScalarizedVector(N->getOperand(1)); + SDValue Op2 = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + Op0.getValueType(), Op0, Op1, Op2); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return GetScalarizedVector(Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + return DAG.getNode(ISD::BITCAST, SDLoc(N), + NewVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + // The BUILD_VECTOR operands may be of wider element types and + // we may need to truncate them back to the requested return type. + if (EltVT.isInteger()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + return DAG.getConvertRndSat(NewVT, SDLoc(N), + Op0, DAG.getValueType(NewVT), + DAG.getValueType(Op0.getValueType()), + N->getOperand(3), + N->getOperand(4), + cast(N)->getCvtCode()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + N->getValueType(0).getVectorElementType(), + N->getOperand(0), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), + NewVT, Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FPOWI, SDLoc(N), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { + // The value to insert may have a wider type than the vector element type, + // so be sure to truncate it to the element type if necessary. + SDValue Op = N->getOperand(1); + EVT EltVT = N->getValueType(0).getVectorElementType(); + if (Op.getValueType() != EltVT) + // FIXME: Can this happen for floating point types? + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op); + return Op; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { + assert(N->isUnindexed() && "Indexed vector load?"); + + SDValue Result = DAG.getLoad(ISD::UNINDEXED, + N->getExtensionType(), + N->getValueType(0).getVectorElementType(), + SDLoc(N), + N->getChain(), N->getBasePtr(), + DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getPointerInfo(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->isNonTemporal(), + N->isInvariant(), N->getOriginalAlignment(), + N->getAAInfo()); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { + // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. + EVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + // This is a workaround for targets where it's impossible to scalarize the + // result of a conversion, because the source type is legal. + // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} + // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is + // legal and was not scalarized. + // See the similar logic in ScalarizeVecRes_VSETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + EVT ExtVT = cast(N->getOperand(1))->getVT().getVectorElementType(); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT, + LHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { + SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + TargetLowering::BooleanContent ScalarBool = + TLI.getBooleanContents(false, false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); + + // If integer and float booleans have different contents then we can't + // reliably optimize in all cases. There is a full explanation for this in + // DAGCombiner::visitSELECT() where the same issue affects folding + // (select C, 0, 1) to (xor C, 1). + if (TLI.getBooleanContents(false, false) != + TLI.getBooleanContents(false, true)) { + // At least try the common case where the boolean is generated by a + // comparison. + if (Cond->getOpcode() == ISD::SETCC) { + EVT OpVT = Cond->getOperand(0)->getValueType(0); + ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); + VecBool = TLI.getBooleanContents(OpVT); + } else + ScalarBool = TargetLowering::UndefinedBooleanContent; + } + + if (ScalarBool != VecBool) { + EVT CondVT = Cond.getValueType(); + switch (ScalarBool) { + case TargetLowering::UndefinedBooleanContent: + break; + case TargetLowering::ZeroOrOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); + // Vector read from all ones, scalar expects a single 1 so mask. + Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, + Cond, DAG.getConstant(1, CondVT)); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrOneBooleanContent); + // Vector reads from a one, scalar from all ones so sign extend. + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT, + Cond, DAG.getValueType(MVT::i1)); + break; + } + } + + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), + N->getOperand(0), N->getOperand(1), + LHS, GetScalarizedVector(N->getOperand(3)), + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() == + N->getOperand(0).getValueType().isVector() && + "Scalar/Vector type mismatch"); + + if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N); + + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + SDLoc DL(N); + + // Turn it into a scalar SETCC. + return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { + // Figure out if the scalar is the LHS or RHS and return it. + SDValue Arg = N->getOperand(2).getOperand(0); + if (Arg.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); + unsigned Op = !cast(Arg)->isNullValue(); + return GetScalarizedVector(N->getOperand(Op)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT OpVT = LHS.getValueType(); + EVT NVT = N->getValueType(0).getVectorElementType(); + SDLoc DL(N); + + // The result needs scalarizing, but it's not a given that the source does. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + LHS = GetScalarizedVector(LHS); + RHS = GetScalarizedVector(RHS); + } else { + EVT VT = OpVT.getVectorElementType(); + LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } + + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, + N->getOperand(2)); + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, NVT, Res); +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Scalarization <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + if (!Res.getNode()) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to scalarize this operator's operand!"); + case ISD::BITCAST: + Res = ScalarizeVecOp_BITCAST(N); + break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::TRUNCATE: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Res = ScalarizeVecOp_UnaryOp(N); + break; + case ISD::CONCAT_VECTORS: + Res = ScalarizeVecOp_CONCAT_VECTORS(N); + break; + case ISD::EXTRACT_VECTOR_ELT: + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); + break; + case ISD::VSELECT: + Res = ScalarizeVecOp_VSELECT(N); + break; + case ISD::STORE: + Res = ScalarizeVecOp_STORE(cast(N), OpNo); + break; + case ISD::FP_ROUND: + Res = ScalarizeVecOp_FP_ROUND(N, OpNo); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::BITCAST, SDLoc(N), + N->getValueType(0), Elt); +} + +/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be +/// scalarized, it must be <1 x ty>. Do the operation on the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0).getScalarType(), Elt); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); +} + +/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - +/// use a BUILD_VECTOR instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { + SmallVector Ops(N->getNumOperands()); + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + Ops[i] = GetScalarizedVector(N->getOperand(i)); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops); +} + +/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to +/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the +/// index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), + Res); + return Res; +} + + +/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be +/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT +/// (still with vector output type since that was acceptable if we got here). +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { + SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); + EVT VT = N->getValueType(0); + + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1), + N->getOperand(2)); +} + +/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be +/// scalarized, it must be <1 x ty>. Just store the element. +SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(N->isUnindexed() && "Indexed store of one-element vector?"); + assert(OpNo == 1 && "Do not know how to scalarize this operand!"); + SDLoc dl(N); + + if (N->isTruncatingStore()) + return DAG.getTruncStore(N->getChain(), dl, + GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getAAInfo()); + + return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->isVolatile(), N->isNonTemporal(), + N->getOriginalAlignment(), N->getAAInfo()); +} + +/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0).getVectorElementType(), Elt, + N->getOperand(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + +//===----------------------------------------------------------------------===// +// Result Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorResult - This method is called when the specified result of the +/// specified node is found to need vector splitting. At this point, the node +/// may also have invalid operands or may have other results that need +/// legalization, we just know that (at least) one result needs vector +/// splitting. +void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Split node result: "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Lo, Hi; + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to split the result of this " + "operator!\n"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::VSELECT: + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; + case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; + case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::LOAD: + SplitVecRes_LOAD(cast(N), Lo, Hi); + break; + case ISD::MLOAD: + SplitVecRes_MLOAD(cast(N), Lo, Hi); + break; + case ISD::SETCC: + SplitVecRes_SETCC(N, Lo, Hi); + break; + case ISD::VECTOR_SHUFFLE: + SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); + break; + + case ISD::BSWAP: + case ISD::CONVERT_RNDSAT: + case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + SplitVecRes_UnaryOp(N, Lo, Hi); + break; + + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + SplitVecRes_ExtendOp(N, Lo, Hi); + break; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::FPOW: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::UREM: + case ISD::SREM: + case ISD::FREM: + SplitVecRes_BinOp(N, Lo, Hi); + break; + case ISD::FMA: + SplitVecRes_TernaryOp(N, Lo, Hi); + break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetSplitVector(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + SDLoc dl(N); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); +} + +void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Op0Lo, Op0Hi; + GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); + SDValue Op1Lo, Op1Hi; + GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); + SDValue Op2Lo, Op2Hi; + GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); + SDLoc dl(N); + + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), + Op0Lo, Op1Lo, Op2Lo); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), + Op0Hi, Op1Hi, Op2Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // We know the result is a vector. The input may be either a vector or a + // scalar value. + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDLoc dl(N); + + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeWidenVector: + break; + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + // A scalar to vector conversion, where the scalar needs expansion. + // If the vector is being split in two then we can just convert the + // expanded pieces. + if (LoVT == HiVT) { + GetExpandedOp(InOp, Lo, Hi); + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + return; + } + break; + case TargetLowering::TypeSplitVector: + // If the input is a vector that needs to be split, convert each split + // piece of the input now. + GetSplitVector(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + return; + } + + // In the general case, convert the input to an integer and split it by hand. + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + if (TLI.isBigEndian()) + std::swap(LoIntVT, HiIntVT); + + SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + unsigned LoNumElts = LoVT.getVectorNumElements(); + SmallVector LoOps(N->op_begin(), N->op_begin()+LoNumElts); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps); + + SmallVector HiOps(N->op_begin()+LoNumElts, N->op_end()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps); +} + +void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); + SDLoc dl(N); + unsigned NumSubvectors = N->getNumOperands() / 2; + if (NumSubvectors == 1) { + Lo = N->getOperand(0); + Hi = N->getOperand(1); + return; + } + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + SmallVector LoOps(N->op_begin(), N->op_begin()+NumSubvectors); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps); + + SmallVector HiOps(N->op_begin()+NumSubvectors, N->op_end()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps); +} + +void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + SDLoc dl(N); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); + uint64_t IdxVal = cast(Idx)->getZExtValue(); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, + DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), + TLI.getVectorIdxTy())); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT SubVecVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Store the new subvector into the specified index. + SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = + DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); +} + +void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDLoc dl(N); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = + DAG.GetSplitDestVTs(cast(N->getOperand(1))->getVT()); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, + DAG.getValueType(LoVT)); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, + DAG.getValueType(HiVT)); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Elt = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + if (ConstantSDNode *CIdx = dyn_cast(Idx)) { + unsigned IdxVal = CIdx->getZExtValue(); + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + if (IdxVal < LoNumElts) + Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + Lo.getValueType(), Lo, Elt, Idx); + else + Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getConstant(IdxVal - LoNumElts, + TLI.getVectorIdxTy())); + return; + } + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return; + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Store the new element. This may be larger than the vector element type, + // so use a truncating store. + SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = + TLI.getDataLayout()->getPrefTypeAlignment(VecType); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); + Hi = DAG.getUNDEF(HiVT); +} + +void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + EVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getOriginalAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, + LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, + isInvariant, Alignment, AAInfo); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, + LD->getPointerInfo().getWithOffset(IncrementSize), + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + +void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MLD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Ch = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + SDValue Mask = MLD->getMask(); + unsigned Alignment = MLD->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MLD->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0 = MLD->getSrc0(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO); + + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MLD, 1), Ch); + +} + +void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + + EVT LoVT, HiVT; + SDLoc DL(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // Split the input. + SDValue LL, LH, RL, RH; + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); +} + +void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Get the dest types - they may not match the input types, e.g. int_to_fp. + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. + EVT InVT = N->getOperand(0).getValueType(); + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(0), Lo, Hi); + else + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + + if (N->getOpcode() == ISD::FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) { + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + SDValue STyOpLo = DAG.getValueType(Lo.getValueType()); + SDValue STyOpHi = DAG.getValueType(Hi.getValueType()); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast(N)->getCvtCode(); + Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + } +} + +void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DestVT = N->getValueType(0); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + + // We can do better than a generic split operation if the extend is doing + // more than just doubling the width of the elements and the following are + // true: + // - The number of vector elements is even, + // - the source type is legal, + // - the type of a split source is illegal, + // - the type of an extended (by doubling element size) source is legal, and + // - the type of that extended source when split is legal. + // + // This won't necessarily completely legalize the operation, but it will + // more effectively move in the right direction and prevent falling down + // to scalarization in many cases due to the input vector being split too + // far. + unsigned NumElements = SrcVT.getVectorNumElements(); + if ((NumElements & 1) == 0 && + SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { + LLVMContext &Ctx = *DAG.getContext(); + EVT NewSrcVT = EVT::getVectorVT( + Ctx, EVT::getIntegerVT( + Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2), + NumElements); + EVT SplitSrcVT = + EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); + EVT SplitLoVT, SplitHiVT; + std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && + TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { + DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + } + // Fall back to the generic unary operator splitting otherwise. + SplitVecRes_UnaryOp(N, Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, + SDValue &Lo, SDValue &Hi) { + // The low and high parts of the original input give four input vectors. + SDValue Inputs[4]; + SDLoc dl(N); + GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); + GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); + EVT NewVT = Inputs[0].getValueType(); + unsigned NewElts = NewVT.getVectorNumElements(); + + // If Lo or Hi uses elements from at most two of the four input vectors, then + // express it as a vector shuffle of those two inputs. Otherwise extract the + // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. + SmallVector Ops; + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; + + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. + unsigned FirstMaskIdx = High * NewElts; + bool useBuildVector = false; + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element does not index into any input vector. + Ops.push_back(-1); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) { + // This input vector is already an operand. + break; + } else if (InputUsed[OpNo] == -1U) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } + + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + useBuildVector = true; + break; + } + + // Add the mask index for the new shuffle vector. + Ops.push_back(Idx + OpNo * NewElts); + } + + if (useBuildVector) { + EVT EltVT = NewVT.getVectorElementType(); + SmallVector SVOps; + + // Extract the input elements by hand. + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element is "undef" or indexes off the end of the input. + SVOps.push_back(DAG.getUNDEF(EltVT)); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Extract the vector element by hand. + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], DAG.getConstant(Idx, + TLI.getVectorIdxTy()))); + } + + // Construct the Lo/Hi output using a BUILD_VECTOR. + Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps); + } else if (InputUsed[0] == -1U) { + // No input vectors were used! The result is undefined. + Output = DAG.getUNDEF(NewVT); + } else { + SDValue Op0 = Inputs[InputUsed[0]]; + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = InputUsed[1] == -1U ? + DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; + // At least one input vector was used. Create a new shuffle vector. + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + } + + Ops.clear(); + } +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorOperand - This method is called when the specified operand of the +/// specified node is found to need vector splitting. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need legalization as well as the specified one. +bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Split node operand: "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom split this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + if (!Res.getNode()) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to split this operator's " + "operand!\n"); + + case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; + case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break; + case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; + case ISD::STORE: + Res = SplitVecOp_STORE(cast(N), OpNo); + break; + case ISD::MSTORE: + Res = SplitVecOp_MSTORE(cast(N), OpNo); + break; + case ISD::VSELECT: + Res = SplitVecOp_VSELECT(N, OpNo); + break; + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FTRUNC: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + Res = SplitVecOp_UnaryOp(N); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { + // The only possibility for an illegal operand is the mask, since result type + // legalization would have handled this node already otherwise. + assert(OpNo == 0 && "Illegal operand must be mask"); + + SDValue Mask = N->getOperand(0); + SDValue Src0 = N->getOperand(1); + SDValue Src1 = N->getOperand(2); + EVT Src0VT = Src0.getValueType(); + SDLoc DL(N); + assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); + + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + assert(Lo.getValueType() == Hi.getValueType() && + "Lo and Hi have differing types"); + + EVT LoOpVT, HiOpVT; + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); + + SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; + std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + + SDValue LoSelect = + DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); + SDValue HiSelect = + DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); +} + +SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc dl(N); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { + // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will + // end up being split all the way down to individual components. Convert the + // split pieces into integers and reassemble. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (TLI.isBigEndian()) + std::swap(Lo, Hi); + + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), + JoinIntegers(Lo, Hi)); +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + // We know that the extracted result type is legal. + EVT SubVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); + SDLoc dl(N); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t IdxVal = cast(Idx)->getZExtValue(); + + if (IdxVal < LoElts) { + assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && + "Extracted subvector crosses vector split!"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + } else { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, + DAG.getConstant(IdxVal - LoElts, Idx.getValueType())); + } +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec.getValueType(); + + if (isa(Idx)) { + uint64_t IdxVal = cast(Idx)->getZExtValue(); + assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); + + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + + if (IdxVal < LoElts) + return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); + return SDValue(DAG.UpdateNodeOperands(N, Hi, + DAG.getConstant(IdxVal - LoElts, + Idx.getValueType())), 0); + } + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return SDValue(); + + // Store the vector to the stack. + EVT EltVT = VecVT.getVectorElementType(); + SDLoc dl(N); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Load back the required element. + StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo(), EltVT, false, false, false, 0); +} + +SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Data = N->getData(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + GetSplitVector(Data, DataLo, DataHi); + SDValue MaskLo, MaskHi; + GetSplitVector(Mask, MaskLo, MaskHi); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, N->getAAInfo(), N->getRanges()); + + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO); + + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + +} + +SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed store of vector?"); + assert(OpNo == 1 && "Can only split the stored value"); + SDLoc DL(N); + + bool isTruncating = N->isTruncatingStore(); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + bool isVol = N->isVolatile(); + bool isNT = N->isNonTemporal(); + AAMDNodes AAInfo = N->getAAInfo(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + + if (isTruncating) + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), + LoMemVT, isVol, isNT, Alignment, AAInfo); + else + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), + isVol, isNT, Alignment, AAInfo); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + if (isTruncating) + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + HiMemVT, isVol, isNT, Alignment, AAInfo); + else + Hi = DAG.getStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + isVol, isNT, Alignment, AAInfo); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { + SDLoc DL(N); + + // The input operands all must have the same type, and we know the result + // type is valid. Convert this to a buildvector which extracts all the + // input elements. + // TODO: If the input elements are power-two vectors, we could convert this to + // a new CONCAT_VECTORS node with elements that are half-wide. + SmallVector Elts; + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) { + SDValue Op = N->getOperand(op); + for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); + i != e; ++i) { + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, + Op, DAG.getConstant(i, TLI.getVectorIdxTy()))); + + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); +} + +SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { + // The result type is legal, but the input type is illegal. If splitting + // ends up with the result type of each half still being legal, just + // do that. If, however, that would result in an illegal result type, + // we can try to get more clever with power-two vectors. Specifically, + // split the input type, but also widen the result element size, then + // concatenate the halves and truncate again. For example, consider a target + // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit + // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do: + // %inlo = v4i32 extract_subvector %in, 0 + // %inhi = v4i32 extract_subvector %in, 4 + // %lo16 = v4i16 trunc v4i32 %inlo + // %hi16 = v4i16 trunc v4i32 %inhi + // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16 + // %res = v8i8 trunc v8i16 %in16 + // + // Without this transform, the original truncate would end up being + // scalarized, which is pretty much always a last resort. + SDValue InVec = N->getOperand(0); + EVT InVT = InVec->getValueType(0); + EVT OutVT = N->getValueType(0); + unsigned NumElements = OutVT.getVectorNumElements(); + // Widening should have already made sure this is a power-two vector + // if we're trying to split it at all. assert() that's true, just in case. + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + + unsigned InElementSize = InVT.getVectorElementType().getSizeInBits(); + unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits(); + + // If the input elements are only 1/2 the width of the result elements, + // just use the normal splitting. Our trick only work if there's room + // to split more than once. + if (InElementSize <= OutElementSize * 2) + return SplitVecOp_UnaryOp(N); + SDLoc DL(N); + + // Extract the halves of the input via extract_subvector. + SDValue InLoVec, InHiVec; + std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); + // Truncate them to 1/2 the element size. + EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, + NumElements/2); + SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec); + // Concatenate them to get the full intermediate truncation result. + EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); + SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, + HalfHi); + // Now finish up by truncating all the way down to the original result + // type. This should normally be something that ends up being legal directly, + // but in theory if a target has very wide vectors and an annoyingly + // restricted set of legal types, this split can chain to build things up. + return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); +} + +SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + // The result has a legal vector type, but the input needs splitting. + SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; + SDLoc DL(N); + GetSplitVector(N->getOperand(0), Lo0, Hi0); + GetSplitVector(N->getOperand(1), Lo1, Hi1); + unsigned PartElements = Lo0.getValueType().getVectorNumElements(); + EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements); + EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements); + + LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); + HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); + return PromoteTargetBoolean(Con, N->getValueType(0)); +} + + +SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc DL(N); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); +} + + + +//===----------------------------------------------------------------------===// +// Result Vector Widening +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Widen node result " << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + + // See if the target wants to custom widen this node. + if (CustomWidenLowerNode(N, N->getValueType(ResNo))) + return; + + SDValue Res = SDValue(); + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen the result of this operator!"); + + case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; + case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::VSELECT: + case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; + case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; + case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); + break; + + case ISD::ADD: + case ISD::AND: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SUB: + case ISD::XOR: + case ISD::FMINNUM: + case ISD::FMAXNUM: + Res = WidenVecRes_Binary(N); + break; + + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + Res = WidenVecRes_BinaryCanTrap(N); + break; + + case ISD::FPOWI: + Res = WidenVecRes_POWI(N); + break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + Res = WidenVecRes_Shift(N); + break; + + case ISD::ANY_EXTEND: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: + Res = WidenVecRes_Convert(N); + break; + + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + Res = WidenVecRes_Unary(N); + break; + case ISD::FMA: + Res = WidenVecRes_Ternary(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (Res.getNode()) + SetWidenedVector(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { + // Ternary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = GetWidenedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { + // Binary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { + // Binary op widening for operations that can trap. + unsigned Opcode = N->getOpcode(); + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenEltVT = WidenVT.getVectorElementType(); + EVT VT = WidenVT; + unsigned NumElts = VT.getVectorNumElements(); + while (!TLI.isTypeLegal(VT) && NumElts != 1) { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } + + if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { + // Operation doesn't trap so just widen as normal. + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); + } + + // No legal vector version so unroll the vector operation and then widen. + if (NumElts == 1) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + + // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector ConcatOps(CurNumElts); + unsigned ConcatEnd = 0; // Current ConcatOps index. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest legal vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getConstant(Idx, TLI.getVectorIdxTy())); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getConstant(Idx, TLI.getVectorIdxTy())); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); + Idx += NumElts; + CurNumElts -= NumElts; + } + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); + + if (NumElts == 1) { + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp1, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp2, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2); + } + CurNumElts = 0; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; + VT = ConcatOps[Idx--].getValueType(); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeLegal(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getConstant(i, + TLI.getVectorIdxTy())); + } + ConcatOps[Idx+1] = VecOp; + ConcatEnd = Idx + 2; + } else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, SubConcatOps); + ConcatEnd = SubConcatIdx + 1; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(MaxVT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(ConcatOps.data(), NumOps)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + SDValue InOp = N->getOperand(0); + SDLoc DL(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); + + unsigned Opcode = N->getOpcode(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(N->getOperand(0)); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) { + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InOp); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1)); + } + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVec); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1)); + } + + if (InVTNumElts % WidenNumElts == 0) { + SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, + InOp, DAG.getConstant(0, + TLI.getVectorIdxTy())); + // Extract the input and convert the shorten input vector. + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVal); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1)); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector Ops(WidenNumElts); + EVT EltVT = WidenVT.getVectorElementType(); + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getConstant(i, TLI.getVectorIdxTy())); + if (N->getNumOperands() == 1) + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); + else + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1)); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + + EVT ShVT = ShOp.getValueType(); + if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) { + ShOp = GetWidenedVector(ShOp); + ShVT = ShOp.getValueType(); + } + EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), + ShVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + if (ShVT != ShWidenVT) + ShOp = ModifyToType(ShOp, ShWidenVT); + + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { + // Unary op widening. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), + cast(N->getOperand(1))->getVT() + .getVectorElementType(), + WidenVT.getVectorNumElements()); + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + WidenVT, WidenLHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { + SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); + return GetWidenedVector(WidenVec); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + break; + case TargetLowering::TypePromoteInteger: + // If the incoming type is a vector that is being promoted, then + // we know that the elements are arranged differently and that we + // must perform the conversion using a stack slot. + if (InVT.isVector()) + break; + + // If the InOp is promoted to the same size, convert it. Otherwise, + // fall out of the switch and widen the promoted input. + InOp = GetPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + break; + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeSplitVector: + break; + case TargetLowering::TypeWidenVector: + // If the InOp is widened to the same size, convert it. Otherwise, fall + // out of the switch and widen the widened input. + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + // The input widens to the same size. Convert to the widen value. + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + break; + } + + unsigned WidenSize = WidenVT.getSizeInBits(); + unsigned InSize = InVT.getSizeInBits(); + // x86mmx is not an acceptable vector element type, so don't try. + if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { + // Determine new input vector type. The new input vector type will use + // the same element type (if its a vector) or use the input type as a + // vector. It is the same size as the type to widen to. + EVT NewInVT; + unsigned NewNumElts = WidenSize / InSize; + if (InVT.isVector()) { + EVT InEltVT = InVT.getVectorElementType(); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, + WidenSize / InEltVT.getSizeInBits()); + } else { + NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); + } + + if (TLI.isTypeLegal(NewInVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + SmallVector Ops(NewNumElts); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i < NewNumElts; ++i) + Ops[i] = UndefVal; + + SDValue NewVec; + if (InVT.isVector()) + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); + else + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); + } + } + + return CreateStackStoreLoad(InOp, WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { + SDLoc dl(N); + // Build a vector with undefined for the new nodes. + EVT VT = N->getValueType(0); + + // Integer BUILD_VECTOR operands may be larger than the node's vector element + // type. The UNDEFs need to have the same type as the existing operands. + EVT EltVT = N->getOperand(0).getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SmallVector NewOps(N->op_begin(), N->op_end()); + assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); + NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { + EVT InVT = N->getOperand(0).getValueType(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumInElts = InVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + + bool InputWidened = false; // Indicates we need to widen the input. + if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { + if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { + // Add undef vectors to widen to correct length. + unsigned NumConcat = WidenVT.getVectorNumElements() / + InVT.getVectorNumElements(); + SDValue UndefVal = DAG.getUNDEF(InVT); + SmallVector Ops(NumConcat); + for (unsigned i=0; i < NumOperands; ++i) + Ops[i] = N->getOperand(i); + for (unsigned i = NumOperands; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops); + } + } else { + InputWidened = true; + if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { + // The inputs and the result are widen to the same value. + unsigned i; + for (i=1; i < NumOperands; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + break; + + if (i == NumOperands) + // Everything but the first operand is an UNDEF so just return the + // widened first operand. + return GetWidenedVector(N->getOperand(0)); + + if (NumOperands == 2) { + // Replace concat of two operands with a shuffle. + SmallVector MaskOps(WidenNumElts, -1); + for (unsigned i = 0; i < NumInElts; ++i) { + MaskOps[i] = i; + MaskOps[i + NumInElts] = i + WidenNumElts; + } + return DAG.getVectorShuffle(WidenVT, dl, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), + &MaskOps[0]); + } + } + } + + // Fall back to use extracts and build vector. + EVT EltVT = WidenVT.getVectorElementType(); + SmallVector Ops(WidenNumElts); + unsigned Idx = 0; + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (InputWidened) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(j, TLI.getVectorIdxTy())); + } + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { + SDLoc dl(N); + SDValue InOp = N->getOperand(0); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); + + SDValue DTyOp = DAG.getValueType(WidenVT); + SDValue STyOp = DAG.getValueType(InWidenVT); + ISD::CvtCode CvtCode = cast(N)->getCvtCode(); + + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector Ops(WidenNumElts); + EVT EltVT = WidenVT.getVectorElementType(); + DTyOp = DAG.getValueType(EltVT); + STyOp = DAG.getValueType(InEltVT); + + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, TLI.getVectorIdxTy())); + Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); + SDLoc dl(N); + + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + + EVT InVT = InOp.getValueType(); + + // Check if we can just return the input vector after widening. + uint64_t IdxVal = cast(Idx)->getZExtValue(); + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + + // We could try widening the input to the right length but for now, extract + // the original elements, fill the rest with undefs and build a vector. + SmallVector Ops(WidenNumElts); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned i; + for (i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy())); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), + InOp.getValueType(), InOp, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { + LoadSDNode *LD = cast(N); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SDValue Result; + SmallVector LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) + Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); + else + Result = GenWidenVectorLoads(LdChain, LD); + + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); + + return Result; +} + +SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), + WidenVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue Cond1 = N->getOperand(0); + EVT CondVT = Cond1.getValueType(); + if (CondVT.isVector()) { + EVT CondEltVT = CondVT.getVectorElementType(); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), + CondEltVT, WidenNumElts); + if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) + Cond1 = GetWidenedVector(Cond1); + + // If we have to split the condition there is no point in widening the + // select. This would result in an cycle of widening the select -> + // widening the condition operand -> splitting the condition operand -> + // splitting the select -> widening the select. Instead split this select + // further and widen the resulting type. + if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) { + SDValue SplitSelect = SplitVecOp_VSELECT(N, 0); + SDValue Res = ModifyToType(SplitSelect, WidenVT); + return Res; + } + + if (Cond1.getValueType() != CondWidenVT) + Cond1 = ModifyToType(Cond1, CondWidenVT); + } + + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(N->getOpcode(), SDLoc(N), + WidenVT, Cond1, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { + SDValue InOp1 = GetWidenedVector(N->getOperand(2)); + SDValue InOp2 = GetWidenedVector(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), + InOp1.getValueType(), N->getOperand(0), + N->getOperand(1), InOp1, InOp2, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() == + N->getOperand(0).getValueType().isVector() && + "Scalar/Vector type mismatch"); + if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, + InOp1, InOp2, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getUNDEF(WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned NumElts = VT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Adjust mask based on new input vector length. + SmallVector NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = N->getMaskElt(i); + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned i = NumElts; i != WidenNumElts; ++i) + NewMask.push_back(-1); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operands must be vectors"); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = N->getOperand(0); + EVT InVT = InOp1.getValueType(); + assert(InVT.isVector() && "can not widen non-vector type"); + EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), WidenNumElts); + InOp1 = GetWidenedVector(InOp1); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Assume that the input and output will be widen appropriately. If not, + // we will have to unroll it at some point. + assert(InOp1.getValueType() == WidenInVT && + InOp2.getValueType() == WidenInVT && + "Input not widened to expected type!"); + (void)WidenInVT; + return DAG.getNode(ISD::SETCC, SDLoc(N), + WidenVT, InOp1, InOp2, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Widen Vector Operand +//===----------------------------------------------------------------------===// +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom widen this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorOperand op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen this operator's operand!"); + + case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = WidenVecOp_EXTEND(N); + break; + + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::TRUNCATE: + Res = WidenVecOp_Convert(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue InOp = N->getOperand(0); + // If some legalization strategy other than widening is used on the operand, + // we can't safely assume that just extending the low lanes is the correct + // transformation. + if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) + return WidenVecOp_Convert(N); + InOp = GetWidenedVector(InOp); + assert(VT.getVectorNumElements() < + InOp.getValueType().getVectorNumElements() && + "Input wasn't widened!"); + + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + else + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + + // Use special DAG nodes to represent the operation of extending the + // low lanes. + switch (N->getOpcode()) { + default: + llvm_unreachable("Extend legalization on on extend operation!"); + case ISD::ANY_EXTEND: + return DAG.getAnyExtendVectorInReg(InOp, DL, VT); + case ISD::SIGN_EXTEND: + return DAG.getSignExtendVectorInReg(InOp, DL, VT); + case ISD::ZERO_EXTEND: + return DAG.getZeroExtendVectorInReg(InOp, DL, VT); + } +} + +SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { + // Since the result is legal and the input is illegal, it is unlikely + // that we can fix the input to a legal type so unroll the convert + // into some scalar code and create a nasty build vector. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + unsigned NumElts = VT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + SmallVector Ops(NumElts); + for (unsigned i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, TLI.getVectorIdxTy()))); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + EVT InWidenVT = InOp.getValueType(); + SDLoc dl(N); + + // Check if we can convert between two legal vector types and extract. + unsigned InWidenSize = InWidenVT.getSizeInBits(); + unsigned Size = VT.getSizeInBits(); + // x86mmx is not an acceptable vector element type, so don't try. + if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { + unsigned NewNumElts = InWidenSize / Size; + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } + } + + return CreateStackStoreLoad(InOp, VT); +} + +SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { + // If the input vector is not legal, it is likely that we will not find a + // legal vector of the same size. Replace the concatenate vector with a + // nasty build vector. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector Ops(NumElts); + + EVT InVT = N->getOperand(0).getValueType(); + unsigned NumInElts = InVT.getVectorNumElements(); + + unsigned Idx = 0; + unsigned NumOperands = N->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(j, TLI.getVectorIdxTy())); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { + // We have to widen the value but we want only to store the original + // vector type. + StoreSDNode *ST = cast(N); + + SmallVector StChain; + if (ST->isTruncatingStore()) + GenWidenVectorTruncStores(StChain, ST); + else + GenWidenVectorStores(StChain, ST); + + if (StChain.size() == 1) + return StChain[0]; + else + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); +} + +SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { + SDValue InOp0 = GetWidenedVector(N->getOperand(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDLoc dl(N); + + // WARNING: In this code we widen the compare instruction with garbage. + // This garbage may contain denormal floats which may be slow. Is this a real + // concern ? Should we zero the unused lanes if this is a float compare ? + + // Get a new SETCC node to compare the newly widened operands. + // Only some of the compared elements are legal. + EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), + SVT, InOp0, InOp1, N->getOperand(2)); + + // Extract the needed results from the result vector. + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), + SVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + ResVT, WideSETCC, DAG.getConstant(0, + TLI.getVectorIdxTy())); + + return PromoteTargetBoolean(CC, N->getValueType(0)); +} + + +//===----------------------------------------------------------------------===// +// Vector Widening Utilities +//===----------------------------------------------------------------------===// + +// Utility function to find the type to chop up a widen vector for load/store +// TLI: Target lowering used to determine legal types. +// Width: Width left need to load/store. +// WidenVT: The widen vector type to load to/store from +// Align: If 0, don't allow use of a wider type +// WidenEx: If Align is not 0, the amount additional we can load/store from. + +static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, + unsigned Width, EVT WidenVT, + unsigned Align = 0, unsigned WidenEx = 0) { + EVT WidenEltVT = WidenVT.getVectorElementType(); + unsigned WidenWidth = WidenVT.getSizeInBits(); + unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); + unsigned AlignInBits = Align*8; + + // If we have one element to load/store, return it. + EVT RetVT = WidenEltVT; + if (Width == WidenEltWidth) + return RetVT; + + // See if there is larger legal integer than the element type to load/store + unsigned VT; + for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; + VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { + EVT MemVT((MVT::SimpleValueType) VT); + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (MemVT.getSizeInBits() <= WidenEltWidth) + break; + if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + RetVT = MemVT; + break; + } + } + + // See if there is a larger vector type to load/store that has the same vector + // element type and is evenly divisible with the WidenVT. + for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; + VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { + EVT MemVT = (MVT::SimpleValueType) VT; + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) + return MemVT; + } + } + + return RetVT; +} + +// Builds a vector type from scalar loads +// VecTy: Resulting Vector type +// LDOps: Load operators to build a vector type +// [Start,End) the list of loads to use. +static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, + SmallVectorImpl &LdOps, + unsigned Start, unsigned End) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(LdOps[Start]); + EVT LdTy = LdOps[Start].getValueType(); + unsigned Width = VecTy.getSizeInBits(); + unsigned NumElts = Width / LdTy.getSizeInBits(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); + + unsigned Idx = 1; + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); + + for (unsigned i = Start + 1; i != End; ++i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + NumElts = Width / NewLdTy.getSizeInBits(); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); + VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); + // Readjust position and vector position based on new load type + Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); + LdTy = NewLdTy; + } + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); + } + return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); +} + +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, + LoadSDNode *LD) { + // The strategy assumes that we can efficiently load powers of two widths. + // The routines chops the vector into the largest vector loads with the same + // element type or scalar loads and then recombines it to the widen vector + // type. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + unsigned WidenWidth = WidenVT.getSizeInBits(); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); + + int LdWidth = LdVT.getSizeInBits(); + int WidthDiff = WidenWidth - LdWidth; // Difference + unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads + + // Find the vector type that can load from. + EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + int NewVTWidth = NewVT.getSizeInBits(); + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), + isVolatile, isNonTemporal, isInvariant, Align, + AAInfo); + LdChain.push_back(LdOp.getValue(1)); + + // Check if we can load the element with one instruction + if (LdWidth <= NewVTWidth) { + if (!NewVT.isVector()) { + unsigned NumElts = WidenWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + if (NewVT == WidenVT) + return LdOp; + + assert(WidenWidth % NewVTWidth == 0); + unsigned NumConcat = WidenWidth / NewVTWidth; + SmallVector ConcatOps(NumConcat); + SDValue UndefVal = DAG.getUNDEF(NewVT); + ConcatOps[0] = LdOp; + for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); + } + + // Load vector by using multiple loads from largest vector to scalar + SmallVector LdOps; + LdOps.push_back(LdOp); + + LdWidth -= NewVTWidth; + unsigned Offset = 0; + + while (LdWidth > 0) { + unsigned Increment = NewVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Increment, BasePtr.getValueType())); + + SDValue L; + if (LdWidth < NewVTWidth) { + // Our current type we are using is too large, find a better size + NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + NewVTWidth = NewVT.getSizeInBits(); + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment), + AAInfo); + LdChain.push_back(L.getValue(1)); + if (L->getValueType(0).isVector()) { + SmallVector Loads; + Loads.push_back(L); + unsigned size = L->getValueSizeInBits(0); + while (size < LdOp->getValueSizeInBits(0)) { + Loads.push_back(DAG.getUNDEF(L->getValueType(0))); + size += L->getValueSizeInBits(0); + } + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads); + } + } else { + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment), + AAInfo); + LdChain.push_back(L.getValue(1)); + } + + LdOps.push_back(L); + + + LdWidth -= NewVTWidth; + } + + // Build the vector from the loads operations + unsigned End = LdOps.size(); + if (!LdOps[0].getValueType().isVector()) + // All the loads are scalar loads. + return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); + + // If the load contains vectors, build the vector using concat vector. + // All of the vectors used to loads are power of 2 and the scalars load + // can be combined to make a power of 2 vector. + SmallVector ConcatOps(End); + int i = End - 1; + int Idx = End; + EVT LdTy = LdOps[i].getValueType(); + // First combine the scalar loads to a vector + if (!LdTy.isVector()) { + for (--i; i >= 0; --i) { + LdTy = LdOps[i].getValueType(); + if (LdTy.isVector()) + break; + } + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); + } + ConcatOps[--Idx] = LdOps[i]; + for (--i; i >= 0; --i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + // Create a larger vector + ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, + makeArrayRef(&ConcatOps[Idx], End - Idx)); + Idx = End - 1; + LdTy = NewLdTy; + } + ConcatOps[--Idx] = LdOps[i]; + } + + if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(&ConcatOps[Idx], End - Idx)); + + // We need to fill the rest with undefs to build the vector + unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + SmallVector WidenOps(NumOps); + SDValue UndefVal = DAG.getUNDEF(LdTy); + { + unsigned i = 0; + for (; i != End-Idx; ++i) + WidenOps[i] = ConcatOps[Idx+i]; + for (; i != NumOps; ++i) + WidenOps[i] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps); +} + +SDValue +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, + LoadSDNode *LD, + ISD::LoadExtType ExtType) { + // For extension loads, it may not be more efficient to chop up the vector + // and then extended it. Instead, we unroll the load and build a new vector. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); + + EVT EltVT = WidenVT.getVectorElementType(); + EVT LdEltVT = LdVT.getVectorElementType(); + unsigned NumElts = LdVT.getVectorNumElements(); + + // Load each element and widen + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector Ops(WidenNumElts); + unsigned Increment = LdEltVT.getSizeInBits() / 8; + Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, + LD->getPointerInfo(), + LdEltVT, isVolatile, isNonTemporal, isInvariant, + Align, AAInfo); + LdChain.push_back(Ops[0].getValue(1)); + unsigned i = 0, Offset = Increment; + for (i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, + DAG.getConstant(Offset, + BasePtr.getValueType())); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, + LD->getPointerInfo().getWithOffset(Offset), LdEltVT, + isVolatile, isNonTemporal, isInvariant, Align, + AAInfo); + LdChain.push_back(Ops[i].getValue(1)); + } + + // Fill the rest with undefs + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i != WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); +} + + +void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, + StoreSDNode *ST) { + // The strategy assumes that we can efficiently store powers of two widths. + // The routines chops the vector into the largest vector stores with the same + // element type or scalar stores. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + SDLoc dl(ST); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + EVT ValVT = ValOp.getValueType(); + unsigned ValWidth = ValVT.getSizeInBits(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned ValEltWidth = ValEltVT.getSizeInBits(); + assert(StVT.getVectorElementType() == ValEltVT); + + int Idx = 0; // current index to store + unsigned Offset = 0; // offset from base to store + while (StWidth != 0) { + // Find the largest vector type we can store with + EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); + unsigned NewVTWidth = NewVT.getSizeInBits(); + unsigned Increment = NewVTWidth / 8; + if (NewVT.isVector()) { + unsigned NumVTElts = NewVT.getVectorNumElements(); + do { + SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, + DAG.getConstant(Idx, TLI.getVectorIdxTy())); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, + MinAlign(Align, Offset), AAInfo)); + StWidth -= NewVTWidth; + Offset += Increment; + Idx += NumVTElts; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Increment, BasePtr.getValueType())); + } while (StWidth != 0 && StWidth >= NewVTWidth); + } else { + // Cast the vector to the scalar type we can store + unsigned NumElts = ValWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); + // Readjust index position based on new vector type + Idx = Idx * ValEltWidth / NewVTWidth; + do { + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, + MinAlign(Align, Offset), AAInfo)); + StWidth -= NewVTWidth; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Increment, BasePtr.getValueType())); + } while (StWidth != 0 && StWidth >= NewVTWidth); + // Restore index back to be relative to the original widen element type + Idx = Idx * NewVTWidth / ValEltWidth; + } + } +} + +void +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, + StoreSDNode *ST) { + // For extension loads, it may not be more efficient to truncate the vector + // and then store it. Instead, we extract each element and then store it. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + SDLoc dl(ST); + + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ValOp.getValueType(); + + // It must be true that we the widen vector type is bigger than where + // we need to store. + assert(StVT.isVector() && ValOp.getValueType().isVector()); + assert(StVT.bitsLT(ValOp.getValueType())); + + // For truncating stores, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the store. + EVT StEltVT = StVT.getVectorElementType(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned NumElts = StVT.getVectorNumElements(); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo(), StEltVT, + isVolatile, isNonTemporal, Align, + AAInfo)); + unsigned Offset = Increment; + for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, DAG.getConstant(Offset, + BasePtr.getValueType())); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, + ST->getPointerInfo().getWithOffset(Offset), + StEltVT, isVolatile, isNonTemporal, + MinAlign(Align, Offset), AAInfo)); + } +} + +/// Modifies a vector input (widen or narrows) to a vector of NVT. The +/// input vector must have the same element type as NVT. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { + // Note that InOp might have been widened so it might already have + // the right width or it might need be narrowed. + EVT InVT = InOp.getValueType(); + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + SDLoc dl(InOp); + + // Check if InOp already has the right width. + if (InVT == NVT) + return InOp; + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { + unsigned NumConcat = WidenNumElts / InNumElts; + SmallVector Ops(NumConcat); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); + } + + if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + + // Fall back to extract and build. + SmallVector Ops(WidenNumElts); + EVT EltVT = NVT.getVectorElementType(); + unsigned MinNumElts = std::min(WidenNumElts, InNumElts); + unsigned Idx; + for (Idx = 0; Idx < MinNumElts; ++Idx) + Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(Idx, TLI.getVectorIdxTy())); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for ( ; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); +} Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4917,6 +4917,60 @@ return SDValue(N, 0); } +SDValue +SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue Src0, + MachineMemOperand *MMO) { + + SDVTList VTs = getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, + MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, VTs, + VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Val }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, + VTs, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue SV, Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -756,6 +756,8 @@ void visitAlloca(const AllocaInst &I); void visitLoad(const LoadInst &I); void visitStore(const StoreInst &I); + void visitMaskedLoad(const CallInst &I); + void visitMaskedStore(const CallInst &I); void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); void visitAtomicRMW(const AtomicRMWInst &I); void visitFence(const FenceInst &I); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3613,6 +3613,70 @@ DAG.setRoot(StoreNode); } +void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + Value *PtrOperand = I.getArgOperand(0); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(1)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO); + DAG.setRoot(StoreNode); + setValue(&I, StoreNode); +} + +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + Value *PtrOperand = I.getArgOperand(0); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(1)); + SDValue Mask = getValue(I.getArgOperand(3)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); + unsigned Alignment = (cast(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue InChain = DAG.getRoot(); + if (AA->pointsToConstantMemory( + AliasAnalysis::Location(PtrOperand, + AA->getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + InChain = DAG.getEntryNode(); + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO); + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + setValue(&I, Load); +} + void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDLoc dl = getCurSDLoc(); AtomicOrdering SuccessOrder = I.getSuccessOrdering(); @@ -4914,6 +4978,12 @@ return nullptr; } + case Intrinsic::masked_load: + visitMaskedLoad(I); + return nullptr; + case Intrinsic::masked_store: + visitMaskedStore(I); + return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -269,6 +269,8 @@ // Other operators case ISD::LOAD: return "load"; case ISD::STORE: return "store"; + case ISD::MLOAD: return "masked_load"; + case ISD::MSTORE: return "masked_store"; case ISD::VAARG: return "vaarg"; case ISD::VACOPY: return "vacopy"; case ISD::VAEND: return "vaend"; Index: lib/IR/Function.cpp =================================================================== --- lib/IR/Function.cpp +++ lib/IR/Function.cpp @@ -537,7 +537,8 @@ IIT_ANYPTR = 26, IIT_V1 = 27, IIT_VARARG = 28, - IIT_HALF_VEC_ARG = 29 + IIT_HALF_VEC_ARG = 29, + IIT_SAME_VEC_WIDTH_ARG = 30 }; @@ -645,6 +646,12 @@ ArgInfo)); return; } + case IIT_SAME_VEC_WIDTH_ARG: { + unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::SameVecWidthArgument, + ArgInfo)); + return; + } case IIT_EMPTYSTRUCT: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0)); return; @@ -752,7 +759,14 @@ case IITDescriptor::HalfVecArgument: return VectorType::getHalfElementsVectorType(cast( Tys[D.getArgumentNumber()])); - } + case IITDescriptor::SameVecWidthArgument: + Type *EltTy = DecodeFixedType(Infos, Tys, Context); + Type *Ty = Tys[D.getArgumentNumber()]; + if (VectorType *VTy = dyn_cast(Ty)) { + return VectorType::get(EltTy, VTy->getNumElements()); + } + llvm_unreachable("unhandled"); + } llvm_unreachable("unhandled"); } Index: lib/IR/IRBuilder.cpp =================================================================== --- lib/IR/IRBuilder.cpp +++ lib/IR/IRBuilder.cpp @@ -183,3 +183,29 @@ return createCallHelper(FnAssume, Ops, this); } +/// Create a call to a Masked Load intrinsic. +/// Ops - an array of operands. +CallInst *IRBuilderBase::CreateMaskedLoad(ArrayRef Ops) { + // The only one overloaded type - the type of passthru value in this case + Type *DataTy = Ops[1]->getType(); + return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops, DataTy); +} + +/// Create a call to a Masked Store intrinsic. +/// Ops - an array of operands. +CallInst *IRBuilderBase::CreateMaskedStore(ArrayRef Ops) { + // DataTy - type of the data to be stored - the only one overloaded type + Type *DataTy = Ops[1]->getType(); + return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, DataTy); +} + +/// Create a call to a Masked intrinsic, with given intrinsic Id, +/// an array of operands - Ops, and one overloaded type - DataTy +CallInst *IRBuilderBase::CreateMaskedIntrinsic(unsigned Id, + ArrayRef Ops, + Type *DataTy) { + Module *M = BB->getParent()->getParent(); + Type *OverloadedTypes[] = { DataTy }; + Value *TheFn = Intrinsic::getDeclaration(M, (Intrinsic::ID)Id, OverloadedTypes); + return createCallHelper(TheFn, Ops, this); +} Index: lib/IR/Verifier.cpp =================================================================== --- lib/IR/Verifier.cpp +++ lib/IR/Verifier.cpp @@ -2399,6 +2399,19 @@ !isa(ArgTys[D.getArgumentNumber()]) || VectorType::getHalfElementsVectorType( cast(ArgTys[D.getArgumentNumber()])) != Ty; + case IITDescriptor::SameVecWidthArgument: { + if (D.getArgumentNumber() >= ArgTys.size()) + return true; + VectorType * ReferenceType = + dyn_cast(ArgTys[D.getArgumentNumber()]); + VectorType *ThisArgType = dyn_cast(Ty); + if (!ThisArgType || !ReferenceType || + (ReferenceType->getVectorNumElements() != + ThisArgType->getVectorNumElements())) + return true; + return VerifyIntrinsicType(ThisArgType->getVectorElementType(), + Infos, ArgTys); + } } llvm_unreachable("unhandled"); } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1320,13 +1320,21 @@ // Extract subvector is special because the value type // (result) is 128-bit but the source is 256-bit wide. - if (VT.is128BitVector()) + if (VT.is128BitVector()) { + if (VT.getScalarSizeInBits() >= 32) { + setOperationAction(ISD::MLOAD, VT, Custom); + setOperationAction(ISD::MSTORE, VT, Custom); + } setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - + } // Do not attempt to custom lower other non-256-bit vectors if (!VT.is256BitVector()) continue; + if (VT.getScalarSizeInBits() >= 32) { + setOperationAction(ISD::MLOAD, VT, Legal); + setOperationAction(ISD::MSTORE, VT, Legal); + } setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); @@ -1493,9 +1501,13 @@ unsigned EltSize = VT.getVectorElementType().getSizeInBits(); // Extract subvector is special because the value type // (result) is 256/128-bit but the source is 512-bit wide. - if (VT.is128BitVector() || VT.is256BitVector()) + if (VT.is128BitVector() || VT.is256BitVector()) { setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); - + if ( EltSize >= 32) { + setOperationAction(ISD::MLOAD, VT, Legal); + setOperationAction(ISD::MSTORE, VT, Legal); + } + } if (VT.getVectorElementType() == MVT::i1) setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); @@ -1511,6 +1523,8 @@ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); + setOperationAction(ISD::MLOAD, VT, Legal); + setOperationAction(ISD::MSTORE, VT, Legal); } } for (int i = MVT::v32i8; i != MVT::v8i64; ++i) { Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -2102,6 +2102,41 @@ (VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src)>; +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), + (VMOVUPSZmrk addr:$ptr, + (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), + (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; + +def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), + (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz + (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; + +def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)), + (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; + +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)), + (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; + +def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)), + (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, + (bc_v16f32 (v16i32 immAllZerosV)))), + (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))), + (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)), + (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; + +def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, + (bc_v8f64 (v16i32 immAllZerosV)))), + (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>; + +def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))), + (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; + defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32", "16", "8", "4", SSEPackedInt, HasAVX512>, avx512_store_vl<0x7F, "vmovdqa32", "alignedstore", @@ -2176,6 +2211,46 @@ (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; } +def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))), + (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)), + (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))), + (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>; + +def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, + (bc_v8i64 (v16i32 immAllZerosV)))), + (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; + +def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)), + (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>; + +def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))), + (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>; + +def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)), + (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>; + +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)), + (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>; + +// SKX replacement +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), + (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>; + +// KNL replacement +def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), + (VMOVDQU32Zmrk addr:$ptr, + (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; + +def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), + (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz + (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; + + // Move Int Doubleword to Packed Double Int // def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src), Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -9260,6 +9260,61 @@ int_x86_avx2_maskstore_q, int_x86_avx2_maskstore_q_256>, VEX_W; +def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)), + (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>; + +def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)), + (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>; + +def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), + (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; + +def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), + (bc_v8f32 (v8i32 immAllZerosV)))), + (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; + +def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))), + (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr), + VR256:$mask)>; + +def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), + (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; + +def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))), + (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; + +def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))), + (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr), + VR256:$mask)>; + +def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)), + (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>; + +def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)), + (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>; + +def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), + (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; + +def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), + (v4f64 immAllZerosV))), + (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; + +def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))), + (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr), + VR256:$mask)>; + +def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), + (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; + +def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), + (bc_v4i64 (v8i32 immAllZerosV)))), + (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; + +def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))), + (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr), + VR256:$mask)>; + //===----------------------------------------------------------------------===// // Variable Bit Shifts Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -111,6 +111,8 @@ Type *Ty) const override; unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty) const override; + bool isLegalPredicatedLoad (Type *DataType, int Consecutive) const; + bool isLegalPredicatedStore(Type *DataType, int Consecutive) const; /// @} }; @@ -1156,3 +1158,18 @@ } return X86TTI::getIntImmCost(Imm, Ty); } + +bool X86TTI::isLegalPredicatedLoad(Type *DataType, int Consecutive) const { + int ScalarWidth = DataType->getScalarSizeInBits(); + + // Todo: AVX512 allows gather/scatter, works with strided and random as well + if ((ScalarWidth < 32) || (Consecutive == 0)) + return false; + if (ST->hasAVX512() || ST->hasAVX2()) + return true; + return false; +} + +bool X86TTI::isLegalPredicatedStore(Type *DataType, int Consecutive) const { + return isLegalPredicatedLoad(DataType, Consecutive); +} \ No newline at end of file Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -580,9 +580,10 @@ LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, DominatorTree *DT, TargetLibraryInfo *TLI, - AliasAnalysis *AA, Function *F) + AliasAnalysis *AA, Function *F, + const TargetTransformInfo *TTI) : NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL), - DT(DT), TLI(TLI), AA(AA), TheFunction(F), Induction(nullptr), + DT(DT), TLI(TLI), AA(AA), TheFunction(F), TTI(TTI), Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) { } @@ -768,6 +769,15 @@ } SmallPtrSet::iterator strides_end() { return StrideSet.end(); } + bool canPredicateStore(Type *DataType, Value *Ptr) { + return TTI->isLegalPredicatedStore(DataType, isConsecutivePtr(Ptr)); + } + bool canPredicateLoad(Type *DataType, Value *Ptr) { + return TTI->isLegalPredicatedLoad(DataType, isConsecutivePtr(Ptr)); + } + bool setMaskedOp(const Instruction* I) { + return (MaskedOp.find(I) != MaskedOp.end()); + } private: /// Check if a single basic block loop is vectorizable. /// At this point we know that this is a loop with a constant trip count @@ -840,6 +850,8 @@ AliasAnalysis *AA; /// Parent function Function *TheFunction; + /// Target Transform Info + const TargetTransformInfo *TTI; // --- vectorization state --- // @@ -871,6 +883,10 @@ ValueToValueMap Strides; SmallPtrSet StrideSet; + + /// While vectorizing these instructions we have to generate a + /// call to an appropriate masked intrinsic + std::set MaskedOp; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1375,7 +1391,7 @@ } // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F); + LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1763,7 +1779,8 @@ unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy); unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF; - if (SI && Legal->blockNeedsPredication(SI->getParent())) + if (SI && Legal->blockNeedsPredication(SI->getParent()) && + !Legal->setMaskedOp(SI)) return scalarizeInstruction(Instr, true); if (ScalarAllocatedSize != VectorElementSize) @@ -1857,8 +1874,25 @@ Value *VecPtr = Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace)); - StoreInst *NewSI = - Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment); + + Instruction *NewSI; + if (Legal->setMaskedOp(SI)) { + Type *I8PtrTy = + Builder.getInt8PtrTy(PartPtr->getType()->getPointerAddressSpace()); + + Value *I8Ptr = Builder.CreateBitCast(PartPtr, I8PtrTy); + + VectorParts Cond = createEdgeMask(SI->getParent()->getSinglePredecessor(), + SI->getParent()); + SmallVector Ops; + Ops.push_back(I8Ptr); + Ops.push_back(StoredVal[Part]); + Ops.push_back(Builder.getInt32(Alignment)); + Ops.push_back(Cond[Part]); + NewSI = Builder.CreateMaskedStore(Ops); + } + else + NewSI = Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment); propagateMetadata(NewSI, SI); } return; @@ -1873,14 +1907,31 @@ if (Reverse) { // If the address is consecutive but reversed, then the - // wide store needs to start at the last vector element. + // wide load needs to start at the last vector element. PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF)); PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF)); } - Value *VecPtr = Builder.CreateBitCast(PartPtr, - DataTy->getPointerTo(AddressSpace)); - LoadInst *NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load"); + Instruction* NewLI; + if (Legal->setMaskedOp(LI)) { + Type *I8PtrTy = + Builder.getInt8PtrTy(PartPtr->getType()->getPointerAddressSpace()); + + Value *I8Ptr = Builder.CreateBitCast(PartPtr, I8PtrTy); + + VectorParts SrcMask = createBlockInMask(LI->getParent()); + SmallVector Ops; + Ops.push_back(I8Ptr); + Ops.push_back(UndefValue::get(DataTy)); + Ops.push_back(Builder.getInt32(Alignment)); + Ops.push_back(SrcMask[Part]); + NewLI = Builder.CreateMaskedLoad(Ops); + } + else { + Value *VecPtr = Builder.CreateBitCast(PartPtr, + DataTy->getPointerTo(AddressSpace)); + NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load"); + } propagateMetadata(NewLI, LI); Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI; } @@ -5303,8 +5354,15 @@ // We might be able to hoist the load. if (it->mayReadFromMemory()) { LoadInst *LI = dyn_cast(it); - if (!LI || !SafePtrs.count(LI->getPointerOperand())) + if (!LI) return false; + if (!SafePtrs.count(LI->getPointerOperand())) { + if (canPredicateLoad(LI->getType(), LI->getPointerOperand())) { + MaskedOp.insert(LI); + continue; + } + return false; + } } // We don't predicate stores at the moment. @@ -5312,10 +5370,20 @@ StoreInst *SI = dyn_cast(it); // We only support predication of stores in basic blocks with one // predecessor. - if (!SI || ++NumPredStores > NumberOfStoresToPredicate || + if (!SI) + return false; + + if (++NumPredStores > NumberOfStoresToPredicate || !SafePtrs.count(SI->getPointerOperand()) || - !SI->getParent()->getSinglePredecessor()) + !SI->getParent()->getSinglePredecessor()) { + if (canPredicateStore(SI->getValueOperand()->getType(), + SI->getPointerOperand())) { + MaskedOp.insert(SI); + --NumPredStores; + continue; + } return false; + } } if (it->mayThrow()) return false; @@ -5379,7 +5447,7 @@ MaxVectorSize = 1; } - assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements" + assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements" " into one vector!"); unsigned VF = MaxVectorSize; @@ -5440,7 +5508,7 @@ // the vector elements. float VectorCost = expectedCost(i) / (float)i; DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << - (int)VectorCost << ".\n"); + VectorCost << ".\n"); if (VectorCost < Cost) { Cost = VectorCost; Width = i; Index: test/CodeGen/X86/masked_memop.ll =================================================================== --- test/CodeGen/X86/masked_memop.ll +++ test/CodeGen/X86/masked_memop.ll @@ -0,0 +1,73 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s -check-prefix=AVX512 +; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2 + +; AVX512-LABEL: test1 +; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z} + +; AVX2-LABEL: test1 +; AVX2: vpmaskmovd 32(%rdi) +; AVX2: vpmaskmovd (%rdi) +; AVX2-NOT: blend + +define <16 x i32> @test1(<16 x i32> %trigger, i8* %addr) { + %mask = icmp eq <16 x i32> %trigger, zeroinitializer + %res = call <16 x i32> @llvm.masked.load.v16i32(i8* %addr, <16 x i32>undef, i32 4, <16 x i1>%mask) + ret <16 x i32> %res +} + +; AVX512-LABEL: test2 +; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z} + +; AVX2-LABEL: test2 +; AVX2: vpmaskmovd {{.*}}(%rdi) +; AVX2: vpmaskmovd {{.*}}(%rdi) +; AVX2-NOT: blend +define <16 x i32> @test2(<16 x i32> %trigger, i8* %addr) { + %mask = icmp eq <16 x i32> %trigger, zeroinitializer + %res = call <16 x i32> @llvm.masked.load.v16i32(i8* %addr, <16 x i32>zeroinitializer, i32 4, <16 x i1>%mask) + ret <16 x i32> %res +} + +; AVX512-LABEL: test3 +; AVX512: vmovdqu32 %zmm1, (%rdi) {%k1} + +define void @test3(<16 x i32> %trigger, i8* %addr, <16 x i32> %val) { + %mask = icmp eq <16 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v16i32(i8* %addr, <16 x i32>%val, i32 4, <16 x i1>%mask) + ret void +} + +; AVX512-LABEL: test4 +; AVX512: vmovups (%rdi), %zmm{{.*{%k[1-7]}}} + +; AVX2-LABEL: test4 +; AVX2: vpmaskmovd {{.*}}(%rdi) +; AVX2: vpmaskmovd {{.*}}(%rdi) +; AVX2: blend +define <16 x float> @test4(<16 x i32> %trigger, i8* %addr, <16 x float> %dst) { + %mask = icmp eq <16 x i32> %trigger, zeroinitializer + %res = call <16 x float> @llvm.masked.load.v16f32(i8* %addr, <16 x float>%dst, i32 4, <16 x i1>%mask) + ret <16 x float> %res +} + +; AVX512-LABEL: test5 +; AVX512: vmovupd (%rdi), %zmm1 {%k1} + +; AVX2-LABEL: test5 +; AVX2: vpmaskmovq +; AVX2: vblendvpd +; AVX2: vpmaskmovq +; AVX2: vblendvpd +define <8 x double> @test5(<8 x i32> %trigger, i8* %addr, <8 x double> %dst) { + %mask = icmp eq <8 x i32> %trigger, zeroinitializer + %res = call <8 x double> @llvm.masked.load.v8f64(i8* %addr, <8 x double>%dst, i32 4, <8 x i1>%mask) + ret <8 x double> %res +} + + +declare <16 x i32> @llvm.masked.load.v16i32(i8*, <16 x i32>, i32, <16 x i1>) +declare void @llvm.masked.store.v16i32(i8*, <16 x i32>, i32, <16 x i1>) +declare <16 x float> @llvm.masked.load.v16f32(i8*, <16 x float>, i32, <16 x i1>) +declare void @llvm.masked.store.v16f32(i8*, <16 x float>, i32, <16 x i1>) +declare <8 x double> @llvm.masked.load.v8f64(i8*, <8 x double>, i32, <8 x i1>) +declare void @llvm.masked.store.v8f64(i8*, <8 x double>, i32, <8 x i1>) \ No newline at end of file Index: test/Transforms/LoopVectorize/X86/mask1.ll =================================================================== --- test/Transforms/LoopVectorize/X86/mask1.ll +++ test/Transforms/LoopVectorize/X86/mask1.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1 +; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2 +; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc_linux" + +; The source code: +; +;void foo(int *A, int *B, int *trigger) { +; +; for (int i=0; i<10000; i++) { +; if (trigger[i] < 100) { +; A[i] = B[i] + trigger[i]; +; } +; } +;} + + +;AVX2: llvm.masked.load.v8i32 +;AVX2: llvm.masked.store.v8i32 +;AVX512: llvm.masked.load.v16i32 +;AVX512: llvm.masked.store.v16i32 +;AVX1-NOT: llvm.masked + +; Function Attrs: nounwind uwtable +define void @foo(i32* %A, i32* %B, i32* %trigger) { +entry: + %A.addr = alloca i32*, align 8 + %B.addr = alloca i32*, align 8 + %trigger.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store i32* %A, i32** %A.addr, align 8 + store i32* %B, i32** %B.addr, align 8 + store i32* %trigger, i32** %trigger.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 10000 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %idxprom = sext i32 %1 to i64 + %2 = load i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom + %3 = load i32* %arrayidx, align 4 + %cmp1 = icmp slt i32 %3, 100 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32* %i, align 4 + %idxprom2 = sext i32 %4 to i64 + %5 = load i32** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2 + %6 = load i32* %arrayidx3, align 4 + %7 = load i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %8 = load i32** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 + %9 = load i32* %arrayidx5, align 4 + %add = add nsw i32 %6, %9 + %10 = load i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %11 = load i32** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6 + store i32 %add, i32* %arrayidx7, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %12 = load i32* %i, align 4 + %inc = add nsw i32 %12, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Transforms/LoopVectorize/X86/mask2.ll =================================================================== --- test/Transforms/LoopVectorize/X86/mask2.ll +++ test/Transforms/LoopVectorize/X86/mask2.ll @@ -0,0 +1,84 @@ +; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1 +; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2 +; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc_linux" + +; The source code: +; +;void foo(float *A, float *B, int *trigger) { +; +; for (int i=0; i<10000; i++) { +; if (trigger[i] < 100) { +; A[i] = B[i] + trigger[i]; +; } +; } +;} + + +;AVX2: llvm.masked.load.v8f32 +;AVX2: llvm.masked.store.v8f32 +;AVX512: llvm.masked.load.v16f32 +;AVX512: llvm.masked.store.v16f32 +;AVX1-NOT: llvm.masked + +; Function Attrs: nounwind uwtable +define void @foo(float* %A, float* %B, i32* %trigger) { +entry: + %A.addr = alloca float*, align 8 + %B.addr = alloca float*, align 8 + %trigger.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store float* %A, float** %A.addr, align 8 + store float* %B, float** %B.addr, align 8 + store i32* %trigger, i32** %trigger.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 10000 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %idxprom = sext i32 %1 to i64 + %2 = load i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom + %3 = load i32* %arrayidx, align 4 + %cmp1 = icmp slt i32 %3, 100 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32* %i, align 4 + %idxprom2 = sext i32 %4 to i64 + %5 = load float** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds float* %5, i64 %idxprom2 + %6 = load float* %arrayidx3, align 4 + %7 = load i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %8 = load i32** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 + %9 = load i32* %arrayidx5, align 4 + %conv = sitofp i32 %9 to float + %add = fadd float %6, %conv + %10 = load i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %11 = load float** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds float* %11, i64 %idxprom6 + store float %add, float* %arrayidx7, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %12 = load i32* %i, align 4 + %inc = add nsw i32 %12, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Transforms/LoopVectorize/X86/mask3.ll =================================================================== --- test/Transforms/LoopVectorize/X86/mask3.ll +++ test/Transforms/LoopVectorize/X86/mask3.ll @@ -0,0 +1,84 @@ +; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1 +; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2 +; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc_linux" + +; The source code: +; +;void foo(double *A, double *B, int *trigger) { +; +; for (int i=0; i<10000; i++) { +; if (trigger[i] < 100) { +; A[i] = B[i] + trigger[i]; +; } +; } +;} + + +;AVX2: llvm.masked.load.v4f64 +;AVX2: llvm.masked.store.v4f64 +;AVX512: llvm.masked.load.v8f64 +;AVX512: llvm.masked.store.v8f64 +;AVX1-NOT: llvm.masked + +; Function Attrs: nounwind uwtable +define void @foo(double* %A, double* %B, i32* %trigger) #0 { +entry: + %A.addr = alloca double*, align 8 + %B.addr = alloca double*, align 8 + %trigger.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store double* %B, double** %B.addr, align 8 + store i32* %trigger, i32** %trigger.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 10000 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %idxprom = sext i32 %1 to i64 + %2 = load i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom + %3 = load i32* %arrayidx, align 4 + %cmp1 = icmp slt i32 %3, 100 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32* %i, align 4 + %idxprom2 = sext i32 %4 to i64 + %5 = load double** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2 + %6 = load double* %arrayidx3, align 8 + %7 = load i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %8 = load i32** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 + %9 = load i32* %arrayidx5, align 4 + %conv = sitofp i32 %9 to double + %add = fadd double %6, %conv + %10 = load i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %11 = load double** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6 + store double %add, double* %arrayidx7, align 8 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %12 = load i32* %i, align 4 + %inc = add nsw i32 %12, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Transforms/LoopVectorize/X86/mask4.ll =================================================================== --- test/Transforms/LoopVectorize/X86/mask4.ll +++ test/Transforms/LoopVectorize/X86/mask4.ll @@ -0,0 +1,83 @@ +; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1 +; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2 +; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc_linux" + +; The source code: +; +;void foo(double *A, double *B, int *trigger) { +; +; for (int i=0; i<10000; i++) { +; if (trigger[i] < 100) { +; A[i] = B[i*2] + trigger[i]; << non-cosecutive access +; } +; } +;} + + +;AVX2-NOT: llvm.masked +;AVX512-NOT: llvm.masked +;AVX1-NOT: llvm.masked + +; Function Attrs: nounwind uwtable +define void @foo(double* %A, double* %B, i32* %trigger) { +entry: + %A.addr = alloca double*, align 8 + %B.addr = alloca double*, align 8 + %trigger.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store double* %A, double** %A.addr, align 8 + store double* %B, double** %B.addr, align 8 + store i32* %trigger, i32** %trigger.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32* %i, align 4 + %cmp = icmp slt i32 %0, 10000 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %1 = load i32* %i, align 4 + %idxprom = sext i32 %1 to i64 + %2 = load i32** %trigger.addr, align 8 + %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom + %3 = load i32* %arrayidx, align 4 + %cmp1 = icmp slt i32 %3, 100 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %4 = load i32* %i, align 4 + %mul = mul nsw i32 %4, 2 + %idxprom2 = sext i32 %mul to i64 + %5 = load double** %B.addr, align 8 + %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2 + %6 = load double* %arrayidx3, align 8 + %7 = load i32* %i, align 4 + %idxprom4 = sext i32 %7 to i64 + %8 = load i32** %trigger.addr, align 8 + %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4 + %9 = load i32* %arrayidx5, align 4 + %conv = sitofp i32 %9 to double + %add = fadd double %6, %conv + %10 = load i32* %i, align 4 + %idxprom6 = sext i32 %10 to i64 + %11 = load double** %A.addr, align 8 + %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6 + store double %add, double* %arrayidx7, align 8 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %12 = load i32* %i, align 4 + %inc = add nsw i32 %12, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: utils/TableGen/CodeGenTarget.cpp =================================================================== --- utils/TableGen/CodeGenTarget.cpp +++ utils/TableGen/CodeGenTarget.cpp @@ -539,7 +539,8 @@ // variants with iAny types; otherwise, if the intrinsic is not // overloaded, all the types can be specified directly. assert(((!TyEl->isSubClassOf("LLVMExtendedType") && - !TyEl->isSubClassOf("LLVMTruncatedType")) || + !TyEl->isSubClassOf("LLVMTruncatedType") && + !TyEl->isSubClassOf("LLVMVectorSameWidth")) || VT == MVT::iAny || VT == MVT::vAny) && "Expected iAny or vAny type"); } else Index: utils/TableGen/IntrinsicEmitter.cpp =================================================================== --- utils/TableGen/IntrinsicEmitter.cpp +++ utils/TableGen/IntrinsicEmitter.cpp @@ -257,7 +257,8 @@ IIT_ANYPTR = 26, IIT_V1 = 27, IIT_VARARG = 28, - IIT_HALF_VEC_ARG = 29 + IIT_HALF_VEC_ARG = 29, + IIT_SAME_VEC_WIDTH_ARG = 30 }; @@ -305,6 +306,13 @@ Sig.push_back(IIT_TRUNC_ARG); else if (R->isSubClassOf("LLVMHalfElementsVectorType")) Sig.push_back(IIT_HALF_VEC_ARG); + else if (R->isSubClassOf("LLVMVectorSameWidth")) { + Sig.push_back(IIT_SAME_VEC_WIDTH_ARG); + Sig.push_back((Number << 2) | ArgCodes[Number]); + MVT::SimpleValueType VT = getValueType(R->getValueAsDef("ElTy")); + EncodeFixedValueType(VT, Sig); + return; + } else Sig.push_back(IIT_ARG); return Sig.push_back((Number << 2) | ArgCodes[Number]);