Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -390,6 +390,10 @@ BUILTIN(__builtin_altivec_vextddvlx, "V2ULLiV2ULLiV2ULLiUi", "") BUILTIN(__builtin_altivec_vextddvrx, "V2ULLiV2ULLiV2ULLiUi", "") +// P10 Vector rotate built-ins. +BUILTIN(__builtin_altivec_vrlqmi, "V1ULLLiV1ULLLiV1ULLLiV1ULLLi", "") +BUILTIN(__builtin_altivec_vrlqnm, "V1ULLLiV1ULLLiV1ULLLi", "") + // VSX built-ins. BUILTIN(__builtin_vsx_lxvd2x, "V2divC*", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -7789,6 +7789,18 @@ } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_rl(vector signed __int128 __a, vector unsigned __int128 __b) { + return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector signed __int128)) - __a)); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_rl(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector unsigned __int128)) - __a)); +} +#endif + /* vec_rlmi */ #ifdef __POWER9_VECTOR__ static __inline__ vector unsigned int __ATTRS_o_ai @@ -7802,8 +7814,24 @@ vector unsigned long long __c) { return __builtin_altivec_vrldmi(__a, __c, __b); } +#endif + +#ifdef __POWER10_VECTOR__ +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_rlmi(vector unsigned __int128 __a, vector unsigned __int128 __b, + vector unsigned __int128 __c) { + return __builtin_altivec_vrlqmi(__a, __c, __b); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_rlmi(vector signed __int128 __a, vector signed __int128 __b, + vector signed __int128 __c) { + return __builtin_altivec_vrlqmi(__a, __c, __b); +} +#endif /* vec_rlnm */ +#ifdef __POWER9_VECTOR__ static __inline__ vector unsigned int __ATTRS_o_ai vec_rlnm(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { @@ -7819,6 +7847,42 @@ } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_rlnm(vector unsigned __int128 __a, vector unsigned __int128 __b, + vector unsigned __int128 __c) { + // Merge __b and __c using an appropriate shuffle. + vector unsigned char TmpB = (vector unsigned char)__b; + vector unsigned char TmpC = (vector unsigned char)__c; + vector unsigned char MaskAndShift = +#ifdef __LITTLE_ENDIAN__ + __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 1, + 0, -1, -1, -1, -1, -1); +#else + __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 30, 31, 15, -1, + -1, -1, -1, -1, -1, -1, -1); +#endif + return __builtin_altivec_vrlqnm(__a, MaskAndShift); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_rlnm(vector signed __int128 __a, vector signed __int128 __b, + vector signed __int128 __c) { + // Merge __b and __c using an appropriate shuffle. + vector unsigned char TmpB = (vector unsigned char)__b; + vector unsigned char TmpC = (vector unsigned char)__c; + vector unsigned char MaskAndShift = +#ifdef __LITTLE_ENDIAN__ + __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 1, + 0, -1, -1, -1, -1, -1); +#else + __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 30, 31, 15, -1, + -1, -1, -1, -1, -1, -1, -1); +#endif + return __builtin_altivec_vrlqnm(__a, MaskAndShift); +} +#endif + /* vec_vrlb */ static __inline__ vector signed char __ATTRS_o_ai Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -17,7 +17,7 @@ vector unsigned int vuia, vuib, vuic; vector signed long long vslla, vsllb; vector unsigned long long vulla, vullb, vullc; -vector signed __int128 vsi128a, vsi128b; +vector signed __int128 vsi128a, vsi128b, vsi128c; vector unsigned __int128 vui128a, vui128b, vui128c; vector float vfa, vfb; vector double vda, vdb; @@ -1157,3 +1157,49 @@ // CHECK: ret <1 x i128> return vec_xl_zext(llb, ullap); } + +vector signed __int128 test_vec_rl_s128(void) { + // CHECK-LABEL: @test_vec_rl_s128( + // CHECK: sub <1 x i128> + // CHECK-NEXT: lshr <1 x i128> + // CHECK-NEXT: or <1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_rl(vsi128a, vsi128b); +} + +vector unsigned __int128 test_vec_rl_u128(void) { + // CHECK-LABEL: @test_vec_rl_u128( + // CHECK: sub <1 x i128> + // CHECK: lshr <1 x i128> + // CHECK: or <1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_rl(vui128a, vui128b); +} + +vector signed __int128 test_vec_rlnm_s128(void) { + // CHECK-LABEL: @test_vec_rlnm_s128( + // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_rlnm(vsi128a, vsi128b, vsi128c); +} + +vector unsigned __int128 test_vec_rlnm_u128(void) { + // CHECK-LABEL: @test_vec_rlnm_u128( + // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_rlnm(vui128a, vui128b, vui128c); +} + +vector signed __int128 test_vec_rlmi_s128(void) { + // CHECK-LABEL: @test_vec_rlmi_s128( + // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_rlmi(vsi128a, vsi128b, vsi128c); +} + +vector unsigned __int128 test_vec_rlmi_u128(void) { + // CHECK-LABEL: @test_vec_rlmi_u128( + // CHECK: call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_rlmi(vui128a, vui128b, vui128c); +} Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1002,6 +1002,15 @@ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +def int_ppc_altivec_vrlqnm : + PowerPC_Vec_Intrinsic<"vrlqnm", [llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; +def int_ppc_altivec_vrlqmi : + PowerPC_Vec_Intrinsic<"vrlqmi", [llvm_v1i128_ty], + [llvm_v1i128_ty, llvm_v1i128_ty, llvm_v1i128_ty], + [IntrNoMem]>; + // Vector Divide Extended Intrinsics. def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">; def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">; Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -890,6 +890,7 @@ setOperationAction(ISD::SREM, MVT::v4i32, Legal); setOperationAction(ISD::UDIV, MVT::v1i128, Legal); setOperationAction(ISD::SDIV, MVT::v1i128, Legal); + setOperationAction(ISD::ROTL, MVT::v1i128, Legal); } setOperationAction(ISD::MUL, MVT::v8i16, Legal); Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1446,19 +1446,25 @@ "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>; def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB), "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>; - def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>; - def VRLQMI : VXForm_1<69, (outs vrrc:$vD), - (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi), - "vrlqmi $vD, $vA, $vB", IIC_VecFP, []>, - RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>; def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>; def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>; - def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>; def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>; def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>; def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>; def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>; + def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>; + def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", + [(set v1i128:$vD, + (int_ppc_altivec_vrlqnm v1i128:$vA, + v1i128:$vB))]>; + def VRLQMI : VXForm_1<69, (outs vrrc:$vD), + (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi), + "vrlqmi $vD, $vA, $vB", IIC_VecFP, + [(set v1i128:$vD, + (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB, + v1i128:$vDi))]>, + RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">; } let Predicates = [IsISA3_1, HasVSX] in { @@ -1510,6 +1516,9 @@ (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>; def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)), (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>; + + def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), + (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; } let Predicates = [IsISA3_1, HasVSX] in { Index: llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/p10-vector-rotate.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the builtins for vector rotate instructions +; on Power10. + + +define <1 x i128> @test_vrlq(<1 x i128> %x, <1 x i128> %y) { +; CHECK-LABEL: test_vrlq: +; CHECK: # %bb.0: +; CHECK-NEXT: vrlq v2, v3, v2 +; CHECK-NEXT: blr + %shl.i = shl <1 x i128> %y, %x + %sub.i = sub <1 x i128> , %x + %lshr.i = lshr <1 x i128> %y, %sub.i + %tmp = or <1 x i128> %shl.i, %lshr.i + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vrlq_cost_mult8(<1 x i128> %x) { +; CHECK-LABEL: test_vrlq_cost_mult8: +; CHECK: # %bb.0: +; CHECK: vrlq v2, v3, v2 +; CHECK-NEXT: blr + %shl.i = shl <1 x i128> , %x + %sub.i = sub <1 x i128> , %x + %lshr.i = lshr <1 x i128> , %sub.i + %tmp = or <1 x i128> %shl.i, %lshr.i + ret <1 x i128> %tmp +} + +define <1 x i128> @test_vrlq_cost_non_mult8(<1 x i128> %x) { +; CHECK-LABEL: test_vrlq_cost_non_mult8: +; CHECK: # %bb.0: +; CHECK: vrlq v2, v3, v2 +; CHECK-NEXT: blr + %shl.i = shl <1 x i128> , %x + %sub.i = sub <1 x i128> , %x + %lshr.i = lshr <1 x i128> , %sub.i + %tmp = or <1 x i128> %shl.i, %lshr.i + ret <1 x i128> %tmp +} + +; Function Attrs: nounwind readnone +define <1 x i128> @test_vrlqmi(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) { +; CHECK-LABEL: test_vrlqmi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrlqmi v3, v2, v4 +; CHECK-NEXT: vmr v2, v3 +; CHECK-NEXT: blr +entry: + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128> %a, <1 x i128> %c, <1 x i128> %b) + ret <1 x i128> %tmp +} + +; Function Attrs: nounwind readnone +define <1 x i128> @test_vrlqnm(<1 x i128> %a, <1 x i128> %b, <1 x i128> %c) { +; CHECK-LABEL: test_vrlqnm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrlqnm v2, v2, v3 +; CHECK-NEXT: xxland v2, v2, v4 +; CHECK-NEXT: blr +entry: + %0 = tail call <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128> %a, <1 x i128> %b) + %tmp = and <1 x i128> %0, %c + ret <1 x i128> %tmp +} + +; Function Attrs: nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vrlqmi(<1 x i128>, <1 x i128>, <1 x i128>) + +; Function Attrs: nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vrlqnm(<1 x i128>, <1 x i128>)