diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16509,6 +16509,54 @@ #define vec_xl_be vec_xl #endif +#if defined(__POWER10_VECTOR__) && defined(__VSX__) + +/* vect_xl_sext */ + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, signed char *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, signed short *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, signed int *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, signed long long *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +/* vec_xl_zext */ + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, unsigned char *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, unsigned short *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, unsigned int *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, unsigned long long *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +#endif + /* vec_xst */ static inline __ATTRS_o_ai void vec_xst(vector signed char __vec, diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -20,10 +20,14 @@ vector unsigned __int128 vui128a, vui128b, vui128c; vector float vfa, vfb; vector double vda, vdb; -unsigned int uia, uib; -unsigned char uca; -unsigned short usa; -unsigned long long ulla; +signed int *iap; +unsigned int uia, uib, *uiap; +signed char *cap; +unsigned char uca, *ucap; +signed short *sap; +unsigned short usa, *usap; +signed long long *llap, llb; +unsigned long long ulla, *ullap; vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> @@ -634,3 +638,59 @@ // CHECK-NEXT: ret i32 return vec_test_lsbb_all_zeros(vuca); } + +vector signed __int128 test_vec_xl_sext_i8(void) { + // CHECK: load i8 + // CHECK: sext i8 + // CHECK: ret <1 x i128> + return vec_xl_sext(llb, cap); +} + +vector signed __int128 test_vec_xl_sext_i16(void) { + // CHECK: load i16 + // CHECK: sext i16 + // CHECK: ret <1 x i128> + return vec_xl_sext(llb, sap); +} + +vector signed __int128 test_vec_xl_sext_i32(void) { + // CHECK: load i32 + // CHECK: sext i32 + // CHECK: ret <1 x i128> + return vec_xl_sext(llb, iap); +} + +vector signed __int128 test_vec_xl_sext_i64(void) { + // CHECK: load i64 + // CHECK: sext i64 + // CHECK: ret <1 x i128> + return vec_xl_sext(llb, llap); +} + +vector unsigned __int128 test_vec_xl_zext_i8(void) { + // CHECK: load i8 + // CHECK: zext i8 + // CHECK: ret <1 x i128> + return vec_xl_zext(llb, ucap); +} + +vector unsigned __int128 test_vec_xl_zext_i16(void) { + // CHECK: load i16 + // CHECK: zext i16 + // CHECK: ret <1 x i128> + return vec_xl_zext(llb, usap); +} + +vector unsigned __int128 test_vec_xl_zext_i32(void) { + // CHECK: load i32 + // CHECK: zext i32 + // CHECK: ret <1 x i128> + return vec_xl_zext(llb, uiap); +} + +vector unsigned __int128 test_vec_xl_zext_i64(void) { + // CHECK: load i64 + // CHECK: zext i64 + // CHECK: ret <1 x i128> + return vec_xl_zext(llb, ullap); +} diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -477,6 +477,12 @@ /// an xxswapd. LXVD2X, + /// LXVRZX - Load VSX Vector Rightmost and Zero Extend + /// This node represents v1i128 BUILD_VECTOR of a zero extending load + /// instruction from to i128. + /// Allows utilization of the Load VSX Vector Rightmost Instructions. + LXVRZX, + /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on /// the vector type to load vector in big-endian element order. diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1434,6 +1434,7 @@ case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; + case PPCISD::LXVRZX: return "PPCISD::LXVRZX"; } return nullptr; } @@ -13383,6 +13384,45 @@ return SDValue(); } +// Look for the pattern of a load from a narrow width to i128, feeding +// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node +// (LXVRZX). This node represents a zero extending load that will be matched +// to the Load VSX Vector Rightmost instructions. +static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) { + SDLoc dl(N); + + // This combine is only eligible for a BUILD_VECTOR of v1i128. + // Other return types are not valid for the LXVRZX replacement. + if (N->getValueType(0) != MVT::v1i128) + return SDValue(); + + SDValue Operand = N->getOperand(0); + // Proceed with the transformation if the operand to the BUILD_VECTOR + // is a load instruction. + if (Operand.getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LD = dyn_cast(Operand); + EVT MemoryType = LD->getMemoryVT(); + + // This transformation is only valid if the we are loading either a byte, + // halfword, word, or doubleword. + bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 || + MemoryType == MVT::i32 || MemoryType == MVT::i64; + + // Ensure that the load from the narrow width is being zero extended to i128. + if (!ValidLDType || (LD->getValueType(0) != MVT::i128) || + (LD->getExtensionType() != ISD::ZEXTLOAD && LD->getExtensionType() != ISD::EXTLOAD)) + return SDValue(); + + SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr(), + DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), dl)}; + + return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, dl, + DAG.getVTList(MVT::v1i128, MVT::Other), + LoadOps, MemoryType, LD->getMemOperand()); +} + SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const { assert(N->getOpcode() == ISD::BUILD_VECTOR && @@ -13420,6 +13460,14 @@ return Reduced; } + // On Power10, the Load VSX Vector Rightmost instructions can be utilized + // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR + // is a load from to i128. + if (Subtarget.isISA3_1()) { + SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG); + if (BVOfZLoad) + return BVOfZLoad; + } if (N->getValueType(0) != MVT::v2f64) return SDValue(); diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -18,6 +18,15 @@ // address computations). class isPCRel { bit PCRel = 1; } +// PowerPC specific type constraints. +def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [ + SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2> +]>; + +// PPC Specific DAG Nodes. +def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX, + [SDNPHasChain, SDNPMayLoad]>; + // Top-level class for prefixed instructions. class PI pref, bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : Instruction { @@ -1134,6 +1143,15 @@ (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>; def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)), (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>; + + def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)), + (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)), + (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)), + (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>; + def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)), + (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>; } let AddedComplexity = 400, Predicates = [IsISA3_1] in { diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll @@ -1,13 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefix=CHECK-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefix=CHECK-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \ ; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-O0 +; RUN: FileCheck %s --check-prefix=CHECK-O0 ; These test cases aims to test the builtins for the Power10 VSX vector ; instructions introduced in ISA 3.1. @@ -15,13 +15,21 @@ declare i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8>, i32) define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) { -; CHECK-LABEL: test_vec_test_lsbb_all_ones: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvtlsbb cr0, v2 -; CHECK-NEXT: mfocrf r3, 128 -; CHECK-NEXT: srwi r3, r3, 31 -; CHECK-NEXT: extsw r3, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: test_vec_test_lsbb_all_ones: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xvtlsbb cr0, v2 +; CHECK-LE-NEXT: mfocrf r3, 128 +; CHECK-LE-NEXT: srwi r3, r3, 31 +; CHECK-LE-NEXT: extsw r3, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vec_test_lsbb_all_ones: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xvtlsbb cr0, v2 +; CHECK-BE-NEXT: mfocrf r3, 128 +; CHECK-BE-NEXT: srwi r3, r3, 31 +; CHECK-BE-NEXT: extsw r3, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones: ; CHECK-O0: # %bb.0: # %entry @@ -36,13 +44,21 @@ } define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) { -; CHECK-LABEL: test_vec_test_lsbb_all_zeros: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvtlsbb cr0, v2 -; CHECK-NEXT: mfocrf r3, 128 -; CHECK-NEXT: rlwinm r3, r3, 3, 31, 31 -; CHECK-NEXT: extsw r3, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: test_vec_test_lsbb_all_zeros: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xvtlsbb cr0, v2 +; CHECK-LE-NEXT: mfocrf r3, 128 +; CHECK-LE-NEXT: rlwinm r3, r3, 3, 31, 31 +; CHECK-LE-NEXT: extsw r3, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: test_vec_test_lsbb_all_zeros: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xvtlsbb cr0, v2 +; CHECK-BE-NEXT: mfocrf r3, 128 +; CHECK-BE-NEXT: rlwinm r3, r3, 3, 31, 31 +; CHECK-BE-NEXT: extsw r3, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros: ; CHECK-O0: # %bb.0: # %entry @@ -57,10 +73,15 @@ } define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_sc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stxvrbx v2, r6, r5 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_sc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, r6, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_sc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: stxvrbx v2, r6, r5 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_sc: ; CHECK-O0: # %bb.0: # %entry @@ -79,10 +100,15 @@ } define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_uc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stxvrbx v2, r6, r5 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_uc: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: stxvrbx v2, r6, r5 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_uc: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: stxvrbx v2, r6, r5 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_uc: ; CHECK-O0: # %bb.0: # %entry @@ -101,11 +127,17 @@ } define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ss: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 1 -; CHECK-NEXT: stxvrhx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ss: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 1 +; CHECK-LE-NEXT: stxvrhx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ss: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: stxvrhx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ss: ; CHECK-O0: # %bb.0: # %entry @@ -125,11 +157,17 @@ } define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_us: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 1 -; CHECK-NEXT: stxvrhx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_us: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 1 +; CHECK-LE-NEXT: stxvrhx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_us: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 1 +; CHECK-BE-NEXT: stxvrhx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_us: ; CHECK-O0: # %bb.0: # %entry @@ -149,11 +187,17 @@ } define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_si: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 -; CHECK-NEXT: stxvrwx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_si: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 2 +; CHECK-LE-NEXT: stxvrwx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_si: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: stxvrwx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_si: ; CHECK-O0: # %bb.0: # %entry @@ -173,11 +217,17 @@ } define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ui: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 2 -; CHECK-NEXT: stxvrwx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ui: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 2 +; CHECK-LE-NEXT: stxvrwx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ui: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 2 +; CHECK-BE-NEXT: stxvrwx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ui: ; CHECK-O0: # %bb.0: # %entry @@ -197,11 +247,17 @@ } define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_sll: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 3 -; CHECK-NEXT: stxvrdx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_sll: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 3 +; CHECK-LE-NEXT: stxvrdx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_sll: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 3 +; CHECK-BE-NEXT: stxvrdx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_sll: ; CHECK-O0: # %bb.0: # %entry @@ -219,11 +275,17 @@ } define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { -; CHECK-LABEL: vec_xst_trunc_ull: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sldi r3, r5, 3 -; CHECK-NEXT: stxvrdx v2, r6, r3 -; CHECK-NEXT: blr +; CHECK-LE-LABEL: vec_xst_trunc_ull: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r5, 3 +; CHECK-LE-NEXT: stxvrdx v2, r6, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xst_trunc_ull: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r5, 3 +; CHECK-BE-NEXT: stxvrdx v2, r6, r3 +; CHECK-BE-NEXT: blr ; ; CHECK-O0-LABEL: vec_xst_trunc_ull: ; CHECK-O0: # %bb.0: # %entry @@ -239,3 +301,236 @@ store i64 %conv, i64* %add.ptr, align 8 ret void } + +define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) { +; CHECK-LE-LABEL: vec_xl_zext: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lxvrbx v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_zext: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lxvrbx v2, r4, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-O0-LABEL: vec_xl_zext: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: lxvrbx vs0, r4, r3 +; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset + %0 = load i8, i8* %add.ptr, align 1 + %conv = zext i8 %0 to i128 + %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0 + ret <1 x i128> %splat.splatinsert +} + +define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) { +; CHECK-LE-LABEL: vec_xl_zext_short: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 1 +; CHECK-LE-NEXT: lxvrhx v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_zext_short: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 1 +; CHECK-BE-NEXT: lxvrhx v2, r4, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-O0-LABEL: vec_xl_zext_short: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: sldi r3, r3, 1 +; CHECK-O0-NEXT: lxvrhx vs0, r4, r3 +; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset + %0 = load i16, i16* %add.ptr, align 2 + %conv = zext i16 %0 to i128 + %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0 + ret <1 x i128> %splat.splatinsert +} + +define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) { +; CHECK-LE-LABEL: vec_xl_zext_word: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: lxvrwx v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_zext_word: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 2 +; CHECK-BE-NEXT: lxvrwx v2, r4, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-O0-LABEL: vec_xl_zext_word: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: sldi r3, r3, 2 +; CHECK-O0-NEXT: lxvrwx vs0, r4, r3 +; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset + %0 = load i32, i32* %add.ptr, align 4 + %conv = zext i32 %0 to i128 + %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0 + ret <1 x i128> %splat.splatinsert +} + +define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) { +; CHECK-LE-LABEL: vec_xl_zext_dw: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: lxvrdx v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_zext_dw: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 3 +; CHECK-BE-NEXT: lxvrdx v2, r4, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-O0-LABEL: vec_xl_zext_dw: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: sldi r3, r3, 3 +; CHECK-O0-NEXT: lxvrdx vs0, r4, r3 +; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset + %0 = load i64, i64* %add.ptr, align 8 + %conv = zext i64 %0 to i128 + %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0 + ret <1 x i128> %splat.splatinsert +} + +define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) { +; CHECK-LE-LABEL: vec_xl_sext_b: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lbzx r3, r4, r3 +; CHECK-LE-NEXT: extsb r3, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_b: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lbzx r3, r4, r3 +; CHECK-BE-NEXT: extsb r3, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-O0-LABEL: vec_xl_sext_b: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: lbzx r3, r4, r3 +; CHECK-O0-NEXT: extsb r3, r3 +; CHECK-O0-NEXT: sradi r4, r3, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %p, i64 %offset + %0 = load i8, i8* %add.ptr, align 1 + %conv = sext i8 %0 to i128 + %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0 + ret <1 x i128> %splat.splatinsert +} + +define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) { +; CHECK-LE-LABEL: vec_xl_sext_h: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 1 +; CHECK-LE-NEXT: lhax r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_h: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 1 +; CHECK-BE-NEXT: lhax r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-O0-LABEL: vec_xl_sext_h: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: sldi r3, r3, 1 +; CHECK-O0-NEXT: lhax r3, r4, r3 +; CHECK-O0-NEXT: sradi r4, r3, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i16, i16* %p, i64 %offset + %0 = load i16, i16* %add.ptr, align 2 + %conv = sext i16 %0 to i128 + %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0 + ret <1 x i128> %splat.splatinsert +} + +define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) { +; CHECK-LE-LABEL: vec_xl_sext_w: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: lwax r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_w: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 2 +; CHECK-BE-NEXT: lwax r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-O0-LABEL: vec_xl_sext_w: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: sldi r3, r3, 2 +; CHECK-O0-NEXT: lwax r3, r4, r3 +; CHECK-O0-NEXT: sradi r4, r3, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %p, i64 %offset + %0 = load i32, i32* %add.ptr, align 4 + %conv = sext i32 %0 to i128 + %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0 + ret <1 x i128> %splat.splatinsert +} + +define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) { +; CHECK-LE-LABEL: vec_xl_sext_d: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: sldi r3, r3, 3 +; CHECK-LE-NEXT: ldx r3, r4, r3 +; CHECK-LE-NEXT: sradi r4, r3, 63 +; CHECK-LE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: vec_xl_sext_d: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: sldi r3, r3, 3 +; CHECK-BE-NEXT: ldx r3, r4, r3 +; CHECK-BE-NEXT: sradi r4, r3, 63 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 +; CHECK-BE-NEXT: blr +; +; CHECK-O0-LABEL: vec_xl_sext_d: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: sldi r3, r3, 3 +; CHECK-O0-NEXT: ldx r3, r4, r3 +; CHECK-O0-NEXT: sradi r4, r3, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i64, i64* %p, i64 %offset + %0 = load i64, i64* %add.ptr, align 8 + %conv = sext i64 %0 to i128 + %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0 + ret <1 x i128> %splat.splatinsert +}