diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h --- a/llvm/lib/Target/VE/VECustomDAG.h +++ b/llvm/lib/Target/VE/VECustomDAG.h @@ -88,6 +88,22 @@ /// } AVL Functions +/// Node Properties { + +Optional getIdiomaticVectorType(SDNode *Op); + +SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG); + +SDValue getMemoryPtr(SDValue Op); + +SDValue getNodeChain(SDValue Op); + +SDValue getStoredValue(SDValue Op); + +SDValue getNodePassthru(SDValue Op); + +/// } Node Properties + enum class Packing { Normal = 0, // 256 element standard mode. Dense = 1 // 512 element packed mode. @@ -157,6 +173,10 @@ SDValue getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, SDValue AVL) const; /// } Packing + SDValue getMergeValues(ArrayRef Values) const { + return DAG.getMergeValues(Values, DL); + } + SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false, bool IsOpaque = false) const; diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -61,6 +61,10 @@ /// \returns the VVP_* SDNode opcode corresponsing to \p OC. Optional getVVPOpcode(unsigned Opcode) { switch (Opcode) { + case ISD::MLOAD: + return VEISD::VVP_LOAD; + case ISD::MSTORE: + return VEISD::VVP_STORE; #define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \ case ISD::VPOPC: \ return VEISD::VVPNAME; @@ -166,8 +170,12 @@ if (isVVPBinaryOp(Opc)) return 2; - // VM Opcodes. + // Other opcodes. switch (Opc) { + case ISD::MSTORE: + return 4; + case ISD::MLOAD: + return 3; case VEISD::VVP_SELECT: return 2; } @@ -177,6 +185,116 @@ bool isLegalAVL(SDValue AVL) { return AVL->getOpcode() == VEISD::LEGALAVL; } +/// Node Properties { + +SDValue getNodeChain(SDValue Op) { + if (MemSDNode *MemN = dyn_cast(Op.getNode())) + return MemN->getChain(); + + switch (Op->getOpcode()) { + case VEISD::VVP_LOAD: + case VEISD::VVP_STORE: + return Op->getOperand(0); + } + return SDValue(); +} + +SDValue getMemoryPtr(SDValue Op) { + if (auto *MemN = dyn_cast(Op.getNode())) + return MemN->getBasePtr(); + + switch (Op->getOpcode()) { + case VEISD::VVP_LOAD: + return Op->getOperand(1); + case VEISD::VVP_STORE: + return Op->getOperand(2); + } + return SDValue(); +} + +Optional getIdiomaticVectorType(SDNode *Op) { + unsigned OC = Op->getOpcode(); + + // For memory ops -> the transfered data type + if (auto MemN = dyn_cast(Op)) + return MemN->getMemoryVT(); + + switch (OC) { + // Standard ISD. + case ISD::SELECT: // not aliased with VVP_SELECT + case ISD::CONCAT_VECTORS: + case ISD::EXTRACT_SUBVECTOR: + case ISD::VECTOR_SHUFFLE: + case ISD::BUILD_VECTOR: + case ISD::SCALAR_TO_VECTOR: + return Op->getValueType(0); + } + + // Translate to VVP where possible. + if (auto VVPOpc = getVVPOpcode(OC)) + OC = *VVPOpc; + + switch (OC) { + default: + case VEISD::VVP_SETCC: + return Op->getOperand(0).getValueType(); + + case VEISD::VVP_SELECT: +#define ADD_BINARY_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME: +#include "VVPNodes.def" + return Op->getValueType(0); + + case VEISD::VVP_LOAD: + return Op->getValueType(0); + + case VEISD::VVP_STORE: + return Op->getOperand(1)->getValueType(0); + + // VEC + case VEISD::VEC_BROADCAST: + return Op->getValueType(0); + } +} + +SDValue getLoadStoreStride(SDValue Op, VECustomDAG &CDAG) { + if (Op->getOpcode() == VEISD::VVP_STORE) + return Op->getOperand(3); + if (Op->getOpcode() == VEISD::VVP_LOAD) + return Op->getOperand(2); + + if (isa(Op.getNode())) { + // Regular MLOAD/MSTORE/LOAD/STORE + // No stride argument -> use the contiguous element size as stride. + uint64_t ElemStride = getIdiomaticVectorType(Op.getNode()) + ->getVectorElementType() + .getStoreSize(); + return CDAG.getConstant(ElemStride, MVT::i64); + } + return SDValue(); +} + +SDValue getStoredValue(SDValue Op) { + switch (Op->getOpcode()) { + case VEISD::VVP_STORE: + return Op->getOperand(1); + } + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getValue(); + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getValue(); + if (auto *StoreN = dyn_cast(Op.getNode())) + return StoreN->getValue(); + return SDValue(); +} + +SDValue getNodePassthru(SDValue Op) { + if (auto *N = dyn_cast(Op.getNode())) + return N->getPassThru(); + return SDValue(); +} + +/// } Node Properties + SDValue getNodeAVL(SDValue Op) { auto PosOpt = getAVLPos(Op->getOpcode()); return PosOpt ? Op->getOperand(*PosOpt) : SDValue(); diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -186,6 +186,8 @@ /// VVP Lowering { SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVVP_LOAD_STORE(SDValue Op, VECustomDAG&) const; + SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const; SDValue splitVectorOp(SDValue Op, VECustomDAG &CDAG) const; SDValue legalizePackedAVL(SDValue Op, VECustomDAG &CDAG) const; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -322,6 +322,17 @@ setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom); } + + // vNt32, vNt64 ops (legal element types) + for (MVT VT : MVT::vector_valuetypes()) { + MVT ElemVT = VT.getVectorElementType(); + unsigned ElemBits = ElemVT.getScalarSizeInBits(); + if (ElemBits != 32 && ElemBits != 64) + continue; + + for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE}) + setOperationAction(MemOpc, VT, Custom); + } } SDValue @@ -1321,6 +1332,12 @@ SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { LoadSDNode *LdNode = cast(Op.getNode()); + EVT MemVT = LdNode->getMemoryVT(); + + // Dispatch to vector isel. + if (MemVT.isVector() && !isMaskType(MemVT)) + return lowerToVVP(Op, DAG); + SDValue BasePtr = LdNode->getBasePtr(); if (isa(BasePtr.getNode())) { // Do not expand store instruction with frame index here because of @@ -1328,7 +1345,6 @@ return Op; } - EVT MemVT = LdNode->getMemoryVT(); if (MemVT == MVT::f128) return lowerLoadF128(Op, DAG); @@ -1375,6 +1391,11 @@ StoreSDNode *StNode = cast(Op.getNode()); assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type"); + // always expand non-mask vector loads to VVP + EVT MemVT = StNode->getMemoryVT(); + if (MemVT.isVector() && !isMaskType(MemVT)) + return lowerToVVP(Op, DAG); + SDValue BasePtr = StNode->getBasePtr(); if (isa(BasePtr.getNode())) { // Do not expand store instruction with frame index here because of @@ -1382,7 +1403,6 @@ return Op; } - EVT MemVT = StNode->getMemoryVT(); if (MemVT == MVT::f128) return lowerStoreF128(Op, DAG); @@ -1699,12 +1719,9 @@ SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "::LowerOperation"; Op->print(dbgs());); unsigned Opcode = Op.getOpcode(); - if (ISD::isVPOpcode(Opcode)) - return lowerToVVP(Op, DAG); + /// Scalar isel. switch (Opcode) { - default: - llvm_unreachable("Should not custom lower this!"); case ISD::ATOMIC_FENCE: return lowerATOMIC_FENCE(Op, DAG); case ISD::ATOMIC_SWAP: @@ -1748,6 +1765,16 @@ return lowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); + } + + /// Vector isel. + LLVM_DEBUG(dbgs() << "::LowerOperation_VVP"; Op->print(dbgs());); + if (ISD::isVPOpcode(Opcode)) + return lowerToVVP(Op, DAG); + + switch (Opcode) { + default: + llvm_unreachable("Should not custom lower this!"); // Legalize the AVL of this internal node. case VEISD::VEC_BROADCAST: @@ -1759,6 +1786,8 @@ return legalizeInternalVectorOp(Op, DAG); // Translate into a VEC_*/VVP_* layer operation. + case ISD::MLOAD: + case ISD::MSTORE: #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME: #include "VVPNodes.def" if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType())) diff --git a/llvm/lib/Target/VE/VETargetTransformInfo.h b/llvm/lib/Target/VE/VETargetTransformInfo.h --- a/llvm/lib/Target/VE/VETargetTransformInfo.h +++ b/llvm/lib/Target/VE/VETargetTransformInfo.h @@ -21,6 +21,32 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" +static llvm::Type *getVectorElementType(llvm::Type *Ty) { + return llvm::cast(Ty)->getElementType(); +} + +static llvm::Type *getLaneType(llvm::Type *Ty) { + using namespace llvm; + if (!isa(Ty)) + return Ty; + return getVectorElementType(Ty); +} + +static bool isVectorLaneType(llvm::Type &ElemTy) { + // check element sizes for vregs + if (ElemTy.isIntegerTy()) { + unsigned ScaBits = ElemTy.getScalarSizeInBits(); + return ScaBits == 1 || ScaBits == 32 || ScaBits == 64; + } + if (ElemTy.isPointerTy()) { + return true; + } + if (ElemTy.isFloatTy() || ElemTy.isDoubleTy()) { + return true; + } + return false; +} + namespace llvm { class VETTIImpl : public BasicTTIImplBase { @@ -86,6 +112,21 @@ // output return false; } + + // Load & Store { + bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { + return isVectorLaneType(*getLaneType(DataType)); + } + bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { + return isVectorLaneType(*getLaneType(DataType)); + } + bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) { + return isVectorLaneType(*getLaneType(DataType)); + }; + bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) { + return isVectorLaneType(*getLaneType(DataType)); + } + // } Load & Store }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp --- a/llvm/lib/Target/VE/VVPISelLowering.cpp +++ b/llvm/lib/Target/VE/VVPISelLowering.cpp @@ -46,6 +46,13 @@ // The representative and legalized vector type of this operation. VECustomDAG CDAG(DAG, Op); + // Dispatch to complex lowering functions. + switch (VVPOpcode) { + case VEISD::VVP_LOAD: + case VEISD::VVP_STORE: + return lowerVVP_LOAD_STORE(Op, CDAG); + }; + EVT OpVecVT = Op.getValueType(); EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT); auto Packing = getTypePacking(LegalVecVT.getSimpleVT()); @@ -89,6 +96,60 @@ llvm_unreachable("lowerToVVP called for unexpected SDNode."); } +SDValue VETargetLowering::lowerVVP_LOAD_STORE(SDValue Op, + VECustomDAG &CDAG) const { + auto VVPOpc = *getVVPOpcode(Op->getOpcode()); + const bool IsLoad = (VVPOpc == VEISD::VVP_LOAD); + + // Shares. + SDValue BasePtr = getMemoryPtr(Op); + SDValue Mask = getNodeMask(Op); + SDValue Chain = getNodeChain(Op); + SDValue AVL = getNodeAVL(Op); + // Store specific. + SDValue Data = getStoredValue(Op); + // Load specific. + SDValue PassThru = getNodePassthru(Op); + + auto DataVT = *getIdiomaticVectorType(Op.getNode()); + auto Packing = getTypePacking(DataVT); + + assert(Packing == Packing::Normal && "TODO Packed load store isel"); + + // TODO: Infer lower AVL from mask. + if (!AVL) + AVL = CDAG.getConstant(DataVT.getVectorNumElements(), MVT::i32); + + // Default to the all-true mask. + if (!Mask) + Mask = CDAG.getConstantMask(Packing, true); + + SDValue StrideV = getLoadStoreStride(Op, CDAG); + if (IsLoad) { + MVT LegalDataVT = getLegalVectorType( + Packing, DataVT.getVectorElementType().getSimpleVT()); + + auto NewLoadV = CDAG.getNode(VEISD::VVP_LOAD, {LegalDataVT, MVT::Other}, + {Chain, BasePtr, StrideV, Mask, AVL}); + + if (!PassThru || PassThru->isUndef()) + return NewLoadV; + + // Convert passthru to an explicit select node. + SDValue DataV = CDAG.getNode(VEISD::VVP_SELECT, DataVT, + {NewLoadV, PassThru, Mask, AVL}); + SDValue NewLoadChainV = SDValue(NewLoadV.getNode(), 1); + + // Merge them back into one node. + return CDAG.getMergeValues({DataV, NewLoadChainV}); + } + + // VVP_STORE + assert(VVPOpc == VEISD::VVP_STORE); + return CDAG.getNode(VEISD::VVP_STORE, Op.getNode()->getVTList(), + {Chain, Data, BasePtr, StrideV, Mask, AVL}); +} + SDValue VETargetLowering::legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const { VECustomDAG CDAG(DAG, Op); diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td --- a/llvm/lib/Target/VE/VVPInstrInfo.td +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -18,6 +18,24 @@ // TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream. //===----------------------------------------------------------------------===// +// vvp_load(ptr, stride, mask, avl) +def SDTLoadVVP : SDTypeProfile<1, 4, [ + SDTCisVec<0>, + SDTCisPtrTy<1>, + SDTCisInt<2>, + SDTCisVec<3>, + IsVLVT<4> +]>; + +// vvp_store(data, ptr, stride, mask, avl) +def SDTStoreVVP: SDTypeProfile<0, 5, [ + SDTCisVec<0>, + SDTCisPtrTy<1>, + SDTCisInt<2>, + SDTCisVec<3>, + IsVLVT<4> +]>; + // Binary Operators { // BinaryOp(x,y,mask,vl) @@ -102,6 +120,12 @@ // } Binary Operators +def vvp_load : SDNode<"VEISD::VVP_LOAD", SDTLoadVVP, + [SDNPHasChain, SDNPMayLoad, SDNPMemOperand ]>; +def vvp_store : SDNode<"VEISD::VVP_STORE", SDTStoreVVP, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + + def vvp_select : SDNode<"VEISD::VVP_SELECT", SDTSelectVVP>; // setcc (lhs, rhs, cc, mask, vl) diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td --- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -17,6 +17,85 @@ //===----------------------------------------------------------------------===// include "VVPInstrInfo.td" +multiclass VectorStore { + // Unmasked (imm stride). + def : Pat<(vvp_store + DataVT:$val, PtrVT:$addr, + (i64 simm7:$stride), (MaskVT true_mask), i32:$avl), + (!cast(STNoMask#"irvl") + (LO7 $stride), $addr, $val, $avl)>; + // Unmasked. + def : Pat<(vvp_store + DataVT:$val, PtrVT:$addr, + i64:$stride, (MaskVT true_mask), i32:$avl), + (!cast(STNoMask#"rrvl") + $stride, $addr, $val, $avl)>; + // Masked (imm stride). + def : Pat<(vvp_store + DataVT:$val, PtrVT:$addr, + (i64 simm7:$stride), MaskVT:$mask, i32:$avl), + (!cast(STWithMask#"irvml") + (LO7 $stride), $addr, $val, $mask, $avl)>; + // Masked. + def : Pat<(vvp_store + DataVT:$val, PtrVT:$addr, + i64:$stride, MaskVT:$mask, i32:$avl), + (!cast(STWithMask#"rrvml") + $stride, $addr, $val, $mask, $avl)>; +} + +defm : VectorStore; +defm : VectorStore; +defm : VectorStore; +defm : VectorStore; + +multiclass VectorLoad { + // Unmasked (imm stride). + def : Pat<(DataVT (vvp_load + PtrVT:$addr, (i64 simm7:$stride), + (MaskVT true_mask), i32:$avl)), + (!cast(LDNoMask#"irl") + (LO7 $stride), $addr, $avl)>; + // Unmasked. + def : Pat<(DataVT (vvp_load + PtrVT:$addr, i64:$stride, + (MaskVT true_mask), i32:$avl)), + (!cast(LDNoMask#"rrl") + $stride, PtrVT:$addr, $avl)>; + // Masked (imm stride). + def : Pat<(DataVT (vvp_load + PtrVT:$addr, (i64 simm7:$stride), + MaskVT:$mask, i32:$avl)), + (!cast(GTWithMask#"vizml") + (VADDULrvml $addr, + (VMULULivml (LO7 $stride), (VSEQl $avl), $mask, $avl), + $mask, $avl), + 0, 0, + $mask, + $avl)>; + // Masked. + def : Pat<(DataVT (vvp_load + PtrVT:$addr, i64:$stride, MaskVT:$mask, i32:$avl)), + (!cast(GTWithMask#"vizml") + (VADDULrvml $addr, + (VMULULrvml $stride, (VSEQl $avl), $mask, $avl), + $mask, $avl), + 0, 0, + $mask, + $avl)>; +} + +defm : VectorLoad; +defm : VectorLoad; +defm : VectorLoad; +defm : VectorLoad; + + + multiclass Binary_rv { diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -44,6 +44,9 @@ #define REGISTER_PACKED(OPC) #endif +ADD_VVP_OP(VVP_LOAD,LOAD) HANDLE_VP_TO_VVP(VP_LOAD, VVP_LOAD) REGISTER_PACKED(VVP_LOAD) +ADD_VVP_OP(VVP_STORE,STORE) HANDLE_VP_TO_VVP(VP_STORE, VVP_STORE) REGISTER_PACKED(VVP_STORE) + // Integer arithmetic. ADD_BINARY_VVP_OP_COMPACT(ADD) REGISTER_PACKED(VVP_ADD) ADD_BINARY_VVP_OP_COMPACT(SUB) REGISTER_PACKED(VVP_SUB) diff --git a/llvm/test/CodeGen/VE/Vector/vec_load.ll b/llvm/test/CodeGen/VE/Vector/vec_load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vec_load.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s + +declare <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>* %0, i32 immarg %1, <128 x i1> %2, <128 x double> %3) #0 + +; TODO: Custom widen by lowering to vvp_load in ReplaceNodeResult +; Function Attrs: nounwind +; define fastcc <128 x double> @vec_mload_v128f64(<128 x double>* %P, <128 x i1> %M) { +; %r = call <128 x double> @llvm.masked.load.v128f64.p0v128f64(<128 x double>* %P, i32 16, <128 x i1> %M, <128 x double> undef) +; ret <128 x double> %r +; } + + +declare <256 x double> @llvm.masked.load.v256f64.p0v256f64(<256 x double>* %0, i32 immarg %1, <256 x i1> %2, <256 x double> %3) #0 + +; Function Attrs: nounwind +define fastcc <256 x double> @vec_mload_v256f64(<256 x double>* %P, <256 x i1> %M) { +; CHECK-LABEL: vec_mload_v256f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vseq %v0 +; CHECK-NEXT: vmulu.l %v0, 8, %v0, %vm1 +; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 +; CHECK-NEXT: vgt %v0, %v0, 0, 0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r = call <256 x double> @llvm.masked.load.v256f64.p0v256f64(<256 x double>* %P, i32 16, <256 x i1> %M, <256 x double> undef) + ret <256 x double> %r +} + +; Function Attrs: nounwind +define fastcc <256 x double> @vec_load_v256f64(<256 x double>* %P) { +; CHECK-LABEL: vec_load_v256f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vld %v0, 8, %s0 +; CHECK-NEXT: b.l.t (, %s10) + %r = load <256 x double>, <256 x double>* %P, align 4 + ret <256 x double> %r +} + +; Function Attrs: nounwind +define fastcc <256 x double> @vec_mload_pt_v256f64(<256 x double>* %P, <256 x double> %PT, <256 x i1> %M) { +; CHECK-LABEL: vec_mload_pt_v256f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vseq %v1 +; CHECK-NEXT: vmulu.l %v1, 8, %v1, %vm1 +; CHECK-NEXT: vaddu.l %v1, %s0, %v1, %vm1 +; CHECK-NEXT: vgt %v1, %v1, 0, 0, %vm1 +; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r = call <256 x double> @llvm.masked.load.v256f64.p0v256f64(<256 x double>* %P, i32 16, <256 x i1> %M, <256 x double> %PT) + ret <256 x double> %r +} + + +declare <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* %0, i32 immarg %1, <256 x i1> %2, <256 x float> %3) #0 + +; Function Attrs: nounwind +define fastcc <256 x float> @vec_mload_v256f32(<256 x float>* %P, <256 x i1> %M) { +; CHECK-LABEL: vec_mload_v256f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vseq %v0 +; CHECK-NEXT: vmulu.l %v0, 4, %v0, %vm1 +; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 +; CHECK-NEXT: vgtu %v0, %v0, 0, 0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* %P, i32 16, <256 x i1> %M, <256 x float> undef) + ret <256 x float> %r +} + +; Function Attrs: nounwind +define fastcc <256 x float> @vec_mload_pt_v256f32(<256 x float>* %P, <256 x float> %PT, <256 x i1> %M) { +; CHECK-LABEL: vec_mload_pt_v256f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vseq %v1 +; CHECK-NEXT: vmulu.l %v1, 4, %v1, %vm1 +; CHECK-NEXT: vaddu.l %v1, %s0, %v1, %vm1 +; CHECK-NEXT: vgtu %v1, %v1, 0, 0, %vm1 +; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r = call <256 x float> @llvm.masked.load.v256f32.p0v256f32(<256 x float>* %P, i32 16, <256 x i1> %M, <256 x float> %PT) + ret <256 x float> %r +} + + +declare <256 x i32> @llvm.masked.load.v256i32.p0v256i32(<256 x i32>* %0, i32 immarg %1, <256 x i1> %2, <256 x i32> %3) #0 + +; Function Attrs: nounwind +define fastcc <256 x i32> @vec_mload_v256i32(<256 x i32>* %P, <256 x i1> %M) { +; CHECK-LABEL: vec_mload_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vseq %v0 +; CHECK-NEXT: vmulu.l %v0, 4, %v0, %vm1 +; CHECK-NEXT: vaddu.l %v0, %s0, %v0, %vm1 +; CHECK-NEXT: vgtl.zx %v0, %v0, 0, 0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r = call <256 x i32> @llvm.masked.load.v256i32.p0v256i32(<256 x i32>* %P, i32 16, <256 x i1> %M, <256 x i32> undef) + ret <256 x i32> %r +} + +; Function Attrs: nounwind +define fastcc <256 x i32> @vec_mload_pt_v256i32(<256 x i32>* %P, <256 x i32> %PT, <256 x i1> %M) { +; CHECK-LABEL: vec_mload_pt_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vseq %v1 +; CHECK-NEXT: vmulu.l %v1, 4, %v1, %vm1 +; CHECK-NEXT: vaddu.l %v1, %s0, %v1, %vm1 +; CHECK-NEXT: vgtl.zx %v1, %v1, 0, 0, %vm1 +; CHECK-NEXT: vmrg %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r = call <256 x i32> @llvm.masked.load.v256i32.p0v256i32(<256 x i32>* %P, i32 16, <256 x i1> %M, <256 x i32> %PT) + ret <256 x i32> %r +} + +attributes #0 = { argmemonly nounwind readonly willreturn } diff --git a/llvm/test/CodeGen/VE/Vector/vec_store.ll b/llvm/test/CodeGen/VE/Vector/vec_store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vec_store.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s + +declare void @llvm.masked.store.v256f64.p0v256f64(<256 x double>, <256 x double>*, i32 immarg, <256 x i1>) + +define fastcc void @vec_mstore_v256f64(<256 x double>* %P, <256 x double> %V, <256 x i1> %M) { +; CHECK-LABEL: vec_mstore_v256f64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vst %v0, 8, %s0 +; CHECK-NEXT: b.l.t (, %s10) + call void @llvm.masked.store.v256f64.p0v256f64(<256 x double> %V, <256 x double>* %P, i32 16, <256 x i1> %M) + ret void +} + + +declare void @llvm.masked.store.v256f32.p0v256f32(<256 x float>, <256 x float>*, i32 immarg, <256 x i1>) + +define fastcc void @vec_mstore_v256f32(<256 x float>* %P, <256 x float> %V, <256 x i1> %M) { +; CHECK-LABEL: vec_mstore_v256f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vstu %v0, 4, %s0 +; CHECK-NEXT: b.l.t (, %s10) + call void @llvm.masked.store.v256f32.p0v256f32(<256 x float> %V, <256 x float>* %P, i32 16, <256 x i1> %M) + ret void +} + + +declare void @llvm.masked.store.v256i32.p0v256i32(<256 x i32>, <256 x i32>*, i32 immarg, <256 x i1>) + +define fastcc void @vec_mstore_v256i32(<256 x i32>* %P, <256 x i32> %V, <256 x i1> %M) { +; CHECK-LABEL: vec_mstore_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vstl %v0, 4, %s0 +; CHECK-NEXT: b.l.t (, %s10) + call void @llvm.masked.store.v256i32.p0v256i32(<256 x i32> %V, <256 x i32>* %P, i32 16, <256 x i1> %M) + ret void +}