Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -16509,6 +16509,54 @@ #define vec_xl_be vec_xl #endif +#if defined(__POWER10_VECTOR__) && defined(__VSX__) + +/* vect_xl_sext */ + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, signed char *__pointer) { + return (unaligned_vec_si128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, signed short *__pointer) { + return (unaligned_vec_si128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, signed int *__pointer) { + return (unaligned_vec_si128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, signed long long *__pointer) { + return (unaligned_vec_si128)*(__pointer + __offset); +} + +/* vec_xl_zext */ + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, unsigned char *__pointer) { + return (unaligned_vec_ui128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, unsigned short *__pointer) { + return (unaligned_vec_ui128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, unsigned int *__pointer) { + return (unaligned_vec_ui128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, unsigned long long *__pointer) { + return (unaligned_vec_ui128)*(__pointer + __offset); +} + +#endif + /* vec_xst */ static inline __ATTRS_o_ai void vec_xst(vector signed char __vec, Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -24,10 +24,14 @@ vector unsigned __int128 vui128a, vui128b, vui128c; vector float vfa, vfb; vector double vda, vdb; -unsigned int uia, uib; -unsigned char uca; -unsigned short usa; -unsigned long long ulla; +signed int *iap; +unsigned int uia, uib, *uiap; +signed char *cap; +unsigned char uca, *ucap; +signed short *sap; +unsigned short usa, *usap; +signed long long *llap, llb; +unsigned long long ulla, *ullap; vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> @@ -581,3 +585,59 @@ // CHECK: ret <4 x float> return vec_splati_ins(vfa, 0, 1.0f); } + +vector signed __int128 test_vec_xl_sext_i8(void) { + // CHECK: load i8 + // CHECK: sext i8 + // CHECK: ret <1 x i128> + return vec_xl_sext(llb, cap); +} + +vector signed __int128 test_vec_xl_sext_i16(void) { + // CHECK: load i16 + // CHECK: sext i16 + // CHECK: ret <1 x i128> + return vec_xl_sext(llb, sap); +} + +vector signed __int128 test_vec_xl_sext_i32(void) { + // CHECK: load i32 + // CHECK: sext i32 + // CHECK: ret <1 x i128> + return vec_xl_sext(llb, iap); +} + +vector signed __int128 test_vec_xl_sext_i64(void) { + // CHECK: load i64 + // CHECK: sext i64 + // CHECK: ret <1 x i128> + return vec_xl_sext(llb, llap); +} + +vector unsigned __int128 test_vec_xl_zext_i8(void) { + // CHECK: load i8 + // CHECK: zext i8 + // CHECK: ret <1 x i128> + return vec_xl_zext(llb, ucap); +} + +vector unsigned __int128 test_vec_xl_zext_i16(void) { + // CHECK: load i16 + // CHECK: zext i16 + // CHECK: ret <1 x i128> + return vec_xl_zext(llb, usap); +} + +vector unsigned __int128 test_vec_xl_zext_i32(void) { + // CHECK: load i32 + // CHECK: zext i32 + // CHECK: ret <1 x i128> + return vec_xl_zext(llb, uiap); +} + +vector unsigned __int128 test_vec_xl_zext_i64(void) { + // CHECK: load i64 + // CHECK: zext i64 + // CHECK: ret <1 x i128> + return vec_xl_zext(llb, ullap); +} Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -493,6 +493,12 @@ /// an xxswapd. LXVD2X, + /// LXVRZX - Load VSX Vector Rightmost and Zero Extend + /// This node represents v1i128 BUILD_VECTOR of a zero extending load + /// instruction from to i128. + /// Allows utilization of the Load VSX Vector Rightmost Instructions. + LXVRZX, + /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on /// the vector type to load vector in big-endian element order. Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1572,6 +1572,7 @@ case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; + case PPCISD::LXVRZX: return "PPCISD::LXVRZX"; } return nullptr; } @@ -14125,6 +14126,44 @@ return SDValue(); } +// Look for the pattern of a load from a narrow width to i128, feeding +// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node +// (LXVRZX). This node represents a zero extending load that will be matched +// to the Load VSX Vector Rightmost instructions. +static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) { + SDLoc dl(N); + + // This combine is only eligible for a BUILD_VECTOR of v1i128. + // Other return types are not valid for the LXVRZX replacement. + if (N->getValueType(0) != MVT::v1i128) + return SDValue(); + + SDValue Operand = N->getOperand(0); + // Proceed with the transformation if the operand to the BUILD_VECTOR + // is a load instruction. + if (Operand.getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LD = dyn_cast(Operand); + EVT MemoryType = LD->getMemoryVT(); + + // This transformation is only valid if the we are loading either a byte, + // halfword, word, or doubleword. + bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 || + MemoryType == MVT::i32 || MemoryType == MVT::i64; + + // Ensure that the load from the narrow width is being zero extended to i128. + if (!ValidLDType || (LD->getValueType(0) != MVT::i128) || + (LD->getExtensionType() != ISD::ZEXTLOAD)) + return SDValue(); + + SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr(), DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), dl)}; + + return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, dl, + DAG.getVTList(MVT::v1i128, MVT::Other), + LoadOps, MemoryType, LD->getMemOperand()); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -14162,6 +14201,14 @@ return Reduced; } + // On Power10, the Load VSX Vector Rightmost instructions can be utilized + // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR + // is a load from to i128. + if (Subtarget.isISA3_1()) { + SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG); + if (BVOfZLoad) + return BVOfZLoad; + } if (N->getValueType(0) != MVT::v2f64) return SDValue(); Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -18,6 +18,15 @@ // address computations). class isPCRel { bit PCRel = 1; } +// PowerPC specific type constraints. +def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [ + SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; + +// PPC Specific DAG Nodes. +def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX, + [SDNPHasChain, SDNPMayLoad]>; + // Top-level class for prefixed instructions. class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { Index: llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt +++ llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt @@ -302,3 +302,15 @@ # CHECK: stxvrdx 35, 3, 1 0x7c 0x63 0x09 0xdb + +# CHECK: lxvrbx 32, 1, 2 +0x7c 0x01 0x10 0x1b + +# CHECK: lxvrhx 33, 1, 2 +0x7c 0x21 0x10 0x5b + +# CHECK: lxvrdx 34, 1, 2 +0x7c 0x41 0x10 0xdb + +# CHECK: lxvrwx 35, 1, 2 +0x7c 0x61 0x10 0x9b Index: llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s =================================================================== --- llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s +++ llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s @@ -429,3 +429,15 @@ # CHECK-BE: stxvrdx 35, 3, 1 # encoding: [0x7c,0x63,0x09,0xdb] # CHECK-LE: stxvrdx 35, 3, 1 # encoding: [0xdb,0x09,0x63,0x7c] stxvrdx 35, 3, 1 +# CHECK-BE: lxvrbx 32, 1, 2 # encoding: [0x7c,0x01,0x10,0x1b] +# CHECK-LE: lxvrbx 32, 1, 2 # encoding: [0x1b,0x10,0x01,0x7c] + lxvrbx 32, 1, 2 +# CHECK-BE: lxvrhx 33, 1, 2 # encoding: [0x7c,0x21,0x10,0x5b] +# CHECK-LE: lxvrhx 33, 1, 2 # encoding: [0x5b,0x10,0x21,0x7c] + lxvrhx 33, 1, 2 +# CHECK-BE: lxvrdx 34, 1, 2 # encoding: [0x7c,0x41,0x10,0xdb] +# CHECK-LE: lxvrdx 34, 1, 2 # encoding: [0xdb,0x10,0x41,0x7c] + lxvrdx 34, 1, 2 +# CHECK-BE: lxvrwx 35, 1, 2 # encoding: [0x7c,0x61,0x10,0x9b] +# CHECK-LE: lxvrwx 35, 1, 2 # encoding: [0x9b,0x10,0x61,0x7c] + lxvrwx 35, 1, 2