diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16597,6 +16597,58 @@ } #endif +/* vec_xst_trunc */ + +#if defined(__POWER10_VECTOR__) && defined(__VSX__) +static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + signed long long __offset, + signed char *__ptr) { + *(__ptr + __offset) = (signed char)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, + signed long long __offset, + unsigned char *__ptr) { + *(__ptr + __offset) = (unsigned char)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + signed long long __offset, + signed short *__ptr) { + *(__ptr + __offset) = (signed short)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, + signed long long __offset, + unsigned short *__ptr) { + *(__ptr + __offset) = (unsigned short)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + signed long long __offset, + signed int *__ptr) { + *(__ptr + __offset) = (signed int)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, + signed long long __offset, + unsigned int *__ptr) { + *(__ptr + __offset) = (unsigned int)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + signed long long __offset, + signed long long *__ptr) { + *(__ptr + __offset) = (signed long long)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, + signed long long __offset, + unsigned long long *__ptr) { + *(__ptr + __offset) = (unsigned long long)__vec[0]; +} +#endif + /* vec_xst_be */ #ifdef __LITTLE_ENDIAN__ diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -10,7 +10,18 @@ vector unsigned short vusa; vector unsigned int vuia; vector unsigned long long vulla, vullb; + +signed char sca; +unsigned char uca; +signed short ssa; +unsigned short usa; +signed int sia; unsigned int uia; +signed long long slla; +unsigned long long ulla; + +vector unsigned __int128 vui128a; +vector signed __int128 vsi128a; vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> @@ -91,3 +102,59 @@ // CHECK-NEXT: ret i32 return vec_test_lsbb_all_zeros(vuca); } + +void test_vec_xst_trunc_sc(vector signed __int128 __a, signed long long __b, + signed char *__c) { + // CHECK-BE: store i8 %{{.+}}, i8* %{{.+}}, align 1 + // CHECK-LE: store i8 %{{.+}}, i8* %{{.+}}, align 1 + vec_xst_trunc(__a, __b, __c); +} + +void test_vec_xst_trunc_uc(vector unsigned __int128 __a, signed long long __b, + unsigned char *__c) { + // CHECK-BE: store i8 %{{.+}}, i8* %{{.+}}, align 1 + // CHECK-LE: store i8 %{{.+}}, i8* %{{.+}}, align 1 + vec_xst_trunc(__a, __b, __c); +} + +void test_vec_xst_trunc_ss(vector signed __int128 __a, signed long long __b, + signed short *__c) { + // CHECK-BE: store i16 %{{.+}}, i16* %{{.+}}, align 2 + // CHECK-LE: store i16 %{{.+}}, i16* %{{.+}}, align 2 + vec_xst_trunc(__a, __b, __c); +} + +void test_vec_xst_trunc_us(vector unsigned __int128 __a, signed long long __b, + unsigned short *__c) { + // CHECK-BE: store i16 %{{.+}}, i16* %{{.+}}, align 2 + // CHECK-LE: store i16 %{{.+}}, i16* %{{.+}}, align 2 + vec_xst_trunc(__a, __b, __c); +} + +void test_vec_xst_trunc_si(vector signed __int128 __a, signed long long __b, + signed int *__c) { + // CHECK-BE: store i32 %{{.+}}, i32* %{{.+}}, align 4 + // CHECK-LE: store i32 %{{.+}}, i32* %{{.+}}, align 4 + vec_xst_trunc(__a, __b, __c); +} + +void test_vec_xst_trunc_ui(vector unsigned __int128 __a, signed long long __b, + unsigned int *__c) { + // CHECK-BE: store i32 %{{.+}}, i32* %{{.+}}, align 4 + // CHECK-LE: store i32 %{{.+}}, i32* %{{.+}}, align 4 + vec_xst_trunc(__a, __b, __c); +} + +void test_vec_xst_trunc_sll(vector signed __int128 __a, signed long long __b, + signed long long *__c) { + // CHECK-BE: store i64 %{{.+}}, i64* %{{.+}}, align 8 + // CHECK-LE: store i64 %{{.+}}, i64* %{{.+}}, align 8 + vec_xst_trunc(__a, __b, __c); +} + +void test_vec_xst_trunc_ull(vector unsigned __int128 __a, signed long long __b, + unsigned long long *__c) { + // CHECK-BE: store i64 %{{.+}}, i64* %{{.+}}, align 8 + // CHECK-LE: store i64 %{{.+}}, i64* %{{.+}}, align 8 + vec_xst_trunc(__a, __b, __c); +} diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -228,6 +228,14 @@ def PrefixInstrs : Predicate<"PPCSubTarget->hasPrefixInstrs()">; def IsISA3_1 : Predicate<"PPCSubTarget->isISA3_1()">; +let mayLoad = 0, mayStore = 1, Predicates = [IsISA3_1] in { + // The XFormMemOp flag is set on the instruction format. + def STXVRBX : X_XS6_RA5_RB5<31, 141, "stxvrbx", vsrc, []>; + def STXVRHX : X_XS6_RA5_RB5<31, 173, "stxvrhx", vsrc, []>; + def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>; + def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>; +} + let Predicates = [PrefixInstrs] in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm PADDI8 : @@ -590,3 +598,14 @@ def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)), (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>; } + +let AddedComplexity = 400, Predicates = [IsISA3_1] in { + def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src), + (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; + def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$rS, 0)), xoaddr:$src), + (STXVRHX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; + def : Pat<(store (i32 (vector_extract v4i32:$rS, 0)), xoaddr:$src), + (STXVRWX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; + def : Pat<(store (i64 (vector_extract v2i64:$rS, 0)), xoaddr:$src), + (STXVRDX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; +} diff --git a/llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll b/llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll --- a/llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vsx-builtins.ll @@ -33,3 +33,113 @@ %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i1 0) ret i32 %0 } + +define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { +; CHECK-LABEL: vec_xst_trunc_sc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxvrbx v2, r6, r5 +; CHECK-NEXT: blr +entry: + %0 = bitcast <1 x i128> %__vec to <16 x i8> + %conv = extractelement <16 x i8> %0, i32 0 + %add.ptr = getelementptr inbounds i8, i8* %__ptr, i64 %__offset + store i8 %conv, i8* %add.ptr, align 1 + ret void +} + +define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) { +; CHECK-LABEL: vec_xst_trunc_uc: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stxvrbx v2, r6, r5 +; CHECK-NEXT: blr +entry: + %0 = bitcast <1 x i128> %__vec to <16 x i8> + %conv = extractelement <16 x i8> %0, i32 0 + %add.ptr = getelementptr inbounds i8, i8* %__ptr, i64 %__offset + store i8 %conv, i8* %add.ptr, align 1 + ret void +} + +define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { +; CHECK-LABEL: vec_xst_trunc_ss: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r3, r5, 1 +; CHECK-NEXT: stxvrhx v2, r6, r3 +; CHECK-NEXT: blr +entry: + %0 = bitcast <1 x i128> %__vec to <8 x i16> + %conv = extractelement <8 x i16> %0, i32 0 + %add.ptr = getelementptr inbounds i16, i16* %__ptr, i64 %__offset + store i16 %conv, i16* %add.ptr, align 2 + ret void +} + +define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) { +; CHECK-LABEL: vec_xst_trunc_us: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r3, r5, 1 +; CHECK-NEXT: stxvrhx v2, r6, r3 +; CHECK-NEXT: blr +entry: + %0 = bitcast <1 x i128> %__vec to <8 x i16> + %conv = extractelement <8 x i16> %0, i32 0 + %add.ptr = getelementptr inbounds i16, i16* %__ptr, i64 %__offset + store i16 %conv, i16* %add.ptr, align 2 + ret void +} + +define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { +; CHECK-LABEL: vec_xst_trunc_si: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r3, r5, 2 +; CHECK-NEXT: stxvrwx v2, r6, r3 +; CHECK-NEXT: blr +entry: + %0 = bitcast <1 x i128> %__vec to <4 x i32> + %conv = extractelement <4 x i32> %0, i32 0 + %add.ptr = getelementptr inbounds i32, i32* %__ptr, i64 %__offset + store i32 %conv, i32* %add.ptr, align 4 + ret void +} + +define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) { +; CHECK-LABEL: vec_xst_trunc_ui: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r3, r5, 2 +; CHECK-NEXT: stxvrwx v2, r6, r3 +; CHECK-NEXT: blr +entry: + %0 = bitcast <1 x i128> %__vec to <4 x i32> + %conv = extractelement <4 x i32> %0, i32 0 + %add.ptr = getelementptr inbounds i32, i32* %__ptr, i64 %__offset + store i32 %conv, i32* %add.ptr, align 4 + ret void +} + +define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { +; CHECK-LABEL: vec_xst_trunc_sll: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r3, r5, 3 +; CHECK-NEXT: stxvrdx v2, r6, r3 +; CHECK-NEXT: blr +entry: + %0 = bitcast <1 x i128> %__vec to <2 x i64> + %conv = extractelement <2 x i64> %0, i32 0 + %add.ptr = getelementptr inbounds i64, i64* %__ptr, i64 %__offset + store i64 %conv, i64* %add.ptr, align 8 + ret void +} + +define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) { +; CHECK-LABEL: vec_xst_trunc_ull: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r3, r5, 3 +; CHECK-NEXT: stxvrdx v2, r6, r3 +; CHECK-NEXT: blr +entry: + %0 = bitcast <1 x i128> %__vec to <2 x i64> + %conv = extractelement <2 x i64> %0, i32 0 + %add.ptr = getelementptr inbounds i64, i64* %__ptr, i64 %__offset + store i64 %conv, i64* %add.ptr, align 8 + ret void +} diff --git a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt --- a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -31,5 +31,17 @@ # CHECK: vclrrb 1, 4, 3 0x10 0x24 0x19 0xcd +# CHECK: stxvrbx 32, 3, 1 +0x7c 0x03 0x09 0x1b + +# CHECK: stxvrhx 33, 3, 1 +0x7c 0x23 0x09 0x5b + +# CHECK: stxvrwx 34, 3, 1 +0x7c 0x43 0x09 0x9b + +# CHECK: stxvrdx 35, 3, 1 +0x7c 0x63 0x09 0xdb + # CHECK: xvtlsbb 1, 7 0xf0 0x82 0x3f 0x6c diff --git a/llvm/test/MC/PowerPC/p10.s b/llvm/test/MC/PowerPC/p10.s --- a/llvm/test/MC/PowerPC/p10.s +++ b/llvm/test/MC/PowerPC/p10.s @@ -33,6 +33,18 @@ # CHECK-BE: vclrrb 1, 4, 3 # encoding: [0x10,0x24,0x19,0xcd] # CHECK-LE: vclrrb 1, 4, 3 # encoding: [0xcd,0x19,0x24,0x10] vclrrb 1, 4, 3 +# CHECK-BE: stxvrbx 32, 3, 1 # encoding: [0x7c,0x03,0x09,0x1b] +# CHECK-LE: stxvrbx 32, 3, 1 # encoding: [0x1b,0x09,0x03,0x7c] + stxvrbx 32, 3, 1 +# CHECK-BE: stxvrhx 33, 3, 1 # encoding: [0x7c,0x23,0x09,0x5b] +# CHECK-LE: stxvrhx 33, 3, 1 # encoding: [0x5b,0x09,0x23,0x7c] + stxvrhx 33, 3, 1 +# CHECK-BE: stxvrwx 34, 3, 1 # encoding: [0x7c,0x43,0x09,0x9b] +# CHECK-LE: stxvrwx 34, 3, 1 # encoding: [0x9b,0x09,0x43,0x7c] + stxvrwx 34, 3, 1 +# CHECK-BE: stxvrdx 35, 3, 1 # encoding: [0x7c,0x63,0x09,0xdb] +# CHECK-LE: stxvrdx 35, 3, 1 # encoding: [0xdb,0x09,0x63,0x7c] + stxvrdx 35, 3, 1 # CHECK-BE: xvtlsbb 1, 7 # encoding: [0xf0,0x82,0x3f,0x6c] # CHECK-LE: xvtlsbb 1, 7 # encoding: [0x6c,0x3f,0x82,0xf0] xvtlsbb 1, 7