Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -303,6 +303,16 @@ BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") +// P9 Vector extend sign builtins. +BUILTIN(__builtin_altivec_vextsb2w, "V4SiV16Sc", "") +BUILTIN(__builtin_altivec_vextsb2d, "V2SLLiV16Sc", "") +BUILTIN(__builtin_altivec_vextsh2w, "V4SiV8Ss", "") +BUILTIN(__builtin_altivec_vextsh2d, "V2SLLiV8Ss", "") +BUILTIN(__builtin_altivec_vextsw2d, "V2SLLiV4Si", "") + +// P10 Vector extend sign builtins. +BUILTIN(__builtin_altivec_vextsd2q, "V1SLLLiV2SLLi", "") + // P10 Vector Extract with Mask built-ins. BUILTIN(__builtin_altivec_vextractbm, "UiV16Uc", "") BUILTIN(__builtin_altivec_vextracthm, "UiV8Us", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -3007,6 +3007,42 @@ #define vec_vctuxs __builtin_altivec_vctuxs +/* vec_signext */ + +#ifdef __POWER9_VECTOR__ +static __inline__ vector signed int __ATTRS_o_ai +vec_signexti(vector signed char __a) { + return __builtin_altivec_vextsb2w(__a); +} + +static __inline__ vector signed int __ATTRS_o_ai +vec_signexti(vector signed short __a) { + return __builtin_altivec_vextsh2w(__a); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_signextll(vector signed char __a) { + return __builtin_altivec_vextsb2d(__a); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_signextll(vector signed short __a) { + return __builtin_altivec_vextsh2d(__a); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_signextll(vector signed int __a) { + return __builtin_altivec_vextsw2d(__a); +} +#endif + +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_signextq(vector signed long long __a) { + return __builtin_altivec_vextsd2q(__a); +} +#endif + /* vec_signed */ static __inline__ vector signed int __ATTRS_o_ai @@ -17259,6 +17295,16 @@ return __a % __b; } +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_mod(vector signed __int128 __a, vector signed __int128 __b) { + return __a % __b; +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_mod(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return __a % __b; +} + /* vec_sldbi */ #define vec_sldb(__a, __b, __c) __builtin_altivec_vsldbi(__a, __b, (__c & 0x7)) Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -1157,3 +1157,21 @@ // CHECK: ret <1 x i128> return vec_xl_zext(llb, ullap); } + +vector signed __int128 test_vec_signextq_s128(void) { + // CHECK: @llvm.ppc.altivec.vextsd2q(<2 x i64> + // CHECK-NEXT: ret <1 x i128> + return vec_signextq(vslla); +} + +vector unsigned __int128 test_vec_mod_u128(void) { + // CHECK: urem <1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_mod(vui128a, vui128b); +} + +vector signed __int128 test_vec_mod_s128(void) { + // CHECK: srem <1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_mod(vsi128a, vsi128b); +} Index: clang/test/CodeGen/builtins-ppc-p9vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p9vector.c +++ clang/test/CodeGen/builtins-ppc-p9vector.c @@ -1227,3 +1227,32 @@ return vec_extract4b(vuca, -5); } +vector signed int test_vec_signexti_si_sc(void) { + // CHECK: @llvm.ppc.altivec.vextsb2w(<16 x i8> + // CHECK-NEXT: ret <4 x i32> + return vec_signexti(vsca); +} + +vector signed int test_vec_signexti_si_ss(void) { + // CHECK: @llvm.ppc.altivec.vextsh2w(<8 x i16> + // CHECK-NEXT: ret <4 x i32> + return vec_signexti(vssa); +} + +vector signed long long test_vec_signextll_sll_sc(void) { + // CHECK: @llvm.ppc.altivec.vextsb2d(<16 x i8> + // CHECK-NEXT: ret <2 x i64> + return vec_signextll(vsca); +} + +vector signed long long test_vec_signextll_sll_ss(void) { + // CHECK: @llvm.ppc.altivec.vextsh2d(<8 x i16> + // CHECK-NEXT: ret <2 x i64> + return vec_signextll(vssa); +} + +vector signed long long test_vec_signextll_sll_si(void) { + // CHECK: @llvm.ppc.altivec.vextsw2d(<4 x i32> + // CHECK-NEXT: ret <2 x i64> + return vec_signextll(vsia); +} Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -782,6 +782,20 @@ Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty], [IntrNoMem]>; + // Vector Sign Extension Instructions + def int_ppc_altivec_vextsb2w : GCCBuiltin<"__builtin_altivec_vextsb2w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsb2d : GCCBuiltin<"__builtin_altivec_vextsb2d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsh2w : GCCBuiltin<"__builtin_altivec_vextsh2w">, + Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsh2d : GCCBuiltin<"__builtin_altivec_vextsh2d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsw2d : GCCBuiltin<"__builtin_altivec_vextsw2d">, + Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vextsd2q : GCCBuiltin<"__builtin_altivec_vextsd2q">, + Intrinsic<[llvm_v1i128_ty], [llvm_v2i64_ty], [IntrNoMem]>; + // Other multiplies. def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">, Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -888,6 +888,8 @@ setOperationAction(ISD::SREM, MVT::v2i64, Legal); setOperationAction(ISD::UREM, MVT::v4i32, Legal); setOperationAction(ISD::SREM, MVT::v4i32, Legal); + setOperationAction(ISD::UREM, MVT::v1i128, Legal); + setOperationAction(ISD::SREM, MVT::v1i128, Legal); setOperationAction(ISD::UDIV, MVT::v1i128, Legal); setOperationAction(ISD::SDIV, MVT::v1i128, Legal); } Index: llvm/lib/Target/PowerPC/PPCInstrAltivec.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1449,11 +1449,16 @@ [(set v2i64:$vD, (cttz v2i64:$vB))]>; // Vector Extend Sign -def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>; -def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>; -def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>; -def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>; -def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>; +def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", [(set v4i32:$vD, + (int_ppc_altivec_vextsb2w v16i8:$vB))]>; +def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", [(set v4i32:$vD, + (int_ppc_altivec_vextsh2w v8i16:$vB))]>; +def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", [(set v2i64:$vD, + (int_ppc_altivec_vextsb2d v16i8:$vB))]>; +def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", [(set v2i64:$vD, + (int_ppc_altivec_vextsh2d v8i16:$vB))]>; +def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", [(set v2i64:$vD, + (int_ppc_altivec_vextsw2d v4i32:$vB))]>; let isCodeGenOnly = 1 in { def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>; def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>; Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1338,11 +1338,14 @@ def VCMPGTSQ_rec : VCMPo <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>; def VCMPGTUQ_rec : VCMPo <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>; def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmodsq $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmodsq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>; def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vmoduq $vD, $vA, $vB", IIC_VecGeneral, []>; + "vmoduq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>; def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB), - "vextsd2q $vD, $vB", IIC_VecGeneral, []>; + "vextsd2q $vD, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>; def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>; def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), Index: llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll +++ llvm/test/CodeGen/PowerPC/p10-vector-modulo.ll @@ -10,6 +10,28 @@ ; The vector modulo instructions operate on signed and unsigned words ; and doublewords. +; The vector modulo instructions operate on signed and unsigned words, +; doublewords and 128-bit values. + + +define <1 x i128> @test_vmodsq(<1 x i128> %x, <1 x i128> %y) nounwind readnone { +; CHECK-LABEL: test_vmodsq: +; CHECK: # %bb.0: +; CHECK-NEXT: vmodsq v2, v2, v3 +; CHECK-NEXT: blr + %tmp = srem <1 x i128> %x, %y + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vmoduq(<1 x i128> %x, <1 x i128> %y) nounwind readnone { +; CHECK-LABEL: test_vmoduq: +; CHECK: # %bb.0: +; CHECK-NEXT: vmoduq v2, v2, v3 +; CHECK-NEXT: blr + %tmp = urem <1 x i128> %x, %y + ret <1 x i128> %tmp +} + define <2 x i64> @test_vmodud(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vmodud: ; CHECK: # %bb.0: # %entry Index: llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-vector-sign-extend.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test vector sign extend builtins. + +declare <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8>) nounwind readnone +declare <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16>) nounwind readnone +declare <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64>) nounwind readnone + +define <4 x i32> @test_vextsb2w(<16 x i8> %x) nounwind readnone { +; CHECK-LABEL: test_vextsb2w: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsb2w v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsb2w(<16 x i8> %x) + ret <4 x i32> %tmp +} + +define <2 x i64> @test_vextsb2d(<16 x i8> %x) nounwind readnone { +; CHECK-LABEL: test_vextsb2d: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsb2d v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsb2d(<16 x i8> %x) + ret <2 x i64> %tmp +} + +define <4 x i32> @test_vextsh2w(<8 x i16> %x) nounwind readnone { +; CHECK-LABEL: test_vextsh2w: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsh2w v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <4 x i32> @llvm.ppc.altivec.vextsh2w(<8 x i16> %x) + ret <4 x i32> %tmp +} + +define <2 x i64> @test_vextsh2d(<8 x i16> %x) nounwind readnone { +; CHECK-LABEL: test_vextsh2d: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsh2d v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsh2d(<8 x i16> %x) + ret <2 x i64> %tmp +} + +define <2 x i64> @test_vextsw2d(<4 x i32> %x) nounwind readnone { +; CHECK-LABEL: test_vextsw2d: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsw2d v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <2 x i64> @llvm.ppc.altivec.vextsw2d(<4 x i32> %x) + ret <2 x i64> %tmp +} + +define <1 x i128> @test_vextsd2q(<2 x i64> %x) nounwind readnone { +; CHECK-LABEL: test_vextsd2q: +; CHECK: # %bb.0: +; CHECK-NEXT: vextsd2q v2, v2 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vextsd2q(<2 x i64> %x) + ret <1 x i128> %tmp +}