diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -315,6 +315,8 @@ BUILTIN(__builtin_altivec_vdiveuw, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vdivesd, "V2LLiV2LLiV2LLi", "") BUILTIN(__builtin_altivec_vdiveud, "V2ULLiV2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vdivesq, "V1SLLLiV1SLLLiV1SLLLi", "") +BUILTIN(__builtin_altivec_vdiveuq, "V1ULLLiV1ULLLiV1ULLLi", "") // P10 Vector Multiply High built-ins. BUILTIN(__builtin_altivec_vmulhsw, "V4SiV4SiV4Si", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -3366,6 +3366,16 @@ vec_dive(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vdiveud(__a, __b); } + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_dive(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return __builtin_altivec_vdiveuq(__a, __b); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_dive(vector signed __int128 __a, vector signed __int128 __b) { + return __builtin_altivec_vdivesq(__a, __b); +} #endif #ifdef __POWER10_VECTOR__ diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -106,6 +106,18 @@ return vec_dive(vulla, vullb); } +vector unsigned __int128 test_vec_dive_u128(void) { + // CHECK: @llvm.ppc.altivec.vdiveuq(<1 x i128> %{{.+}}, <1 x i128> %{{.+}}) + // CHECK-NEXT: ret <1 x i128> + return vec_dive(vui128a, vui128b); +} + +vector signed __int128 test_vec_dive_s128(void) { + // CHECK: @llvm.ppc.altivec.vdivesq(<1 x i128> %{{.+}}, <1 x i128> %{{.+}}) + // CHECK-NEXT: ret <1 x i128> + return vec_dive(vsi128a, vsi128b); +} + vector signed int test_vec_mulh_si(void) { // CHECK: @llvm.ppc.altivec.vmulhsw(<4 x i32> %{{.+}}, <4 x i32> %{{.+}}) // CHECK-NEXT: ret <4 x i32> diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -981,6 +981,8 @@ def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">; def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">; def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">; +def int_ppc_altivec_vdivesq : PowerPC_Vec_QQQ_Intrinsic<"vdivesq">; +def int_ppc_altivec_vdiveuq : PowerPC_Vec_QQQ_Intrinsic<"vdiveuq">; // Vector Multiply High Intrinsics. def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">; diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1291,9 +1291,13 @@ "vdivuq $vD, $vA, $vB", IIC_VecGeneral, [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>; def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdivesq $vD, $vA, $vB", IIC_VecGeneral, []>; + "vdivesq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vdivesq v1i128:$vA, + v1i128:$vB))]>; def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), - "vdiveuq $vD, $vA, $vB", IIC_VecGeneral, []>; + "vdiveuq $vD, $vA, $vB", IIC_VecGeneral, + [(set v1i128:$vD, (int_ppc_altivec_vdiveuq v1i128:$vA, + v1i128:$vB))]>; def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>; def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>; def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>; diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll --- a/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-divide.ll @@ -9,6 +9,7 @@ ; This test case aims to test the vector divide instructions on Power10. ; This includes the low order and extended versions of vector divide, ; that operate on signed and unsigned words and doublewords. +; This also includes 128 bit vector divide instructions. define <2 x i64> @test_vdivud(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test_vdivud: @@ -113,3 +114,25 @@ %div = tail call <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64> %a, <2 x i64> %b) ret <2 x i64> %div } + +declare <1 x i128> @llvm.ppc.altivec.vdivesq(<1 x i128>, <1 x i128>) nounwind readnone +declare <1 x i128> @llvm.ppc.altivec.vdiveuq(<1 x i128>, <1 x i128>) nounwind readnone + +define <1 x i128> @test_vdivesq(<1 x i128> %x, <1 x i128> %y) nounwind readnone { +; CHECK-LABEL: test_vdivesq: +; CHECK: # %bb.0: +; CHECK-NEXT: vdivesq v2, v2, v3 +; CHECK-NEXT: blr + %tmp = tail call <1 x i128> @llvm.ppc.altivec.vdivesq(<1 x i128> %x, <1 x i128> %y) + ret <1 x i128> %tmp +} + + +define <1 x i128> @test_vdiveuq(<1 x i128> %x, <1 x i128> %y) nounwind readnone { +; CHECK-LABEL: test_vdiveuq: +; CHECK: # %bb.0: +; CHECK-NEXT: vdiveuq v2, v2, v3 +; CHECK-NEXT: blr + %tmp = call <1 x i128> @llvm.ppc.altivec.vdiveuq(<1 x i128> %x, <1 x i128> %y) + ret <1 x i128> %tmp +}