Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -298,6 +298,18 @@ BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector Divide Extended built-ins. +BUILTIN(__builtin_altivec_vdivesw, "V4SiV4SiV4Si", "") +BUILTIN(__builtin_altivec_vdiveuw, "V4UiV4UiV4Ui", "") +BUILTIN(__builtin_altivec_vdivesd, "V2LLiV2LLiV2LLi", "") +BUILTIN(__builtin_altivec_vdiveud, "V2ULLiV2ULLiV2ULLi", "") + +// P10 Vector Multiply High built-ins. +BUILTIN(__builtin_altivec_vmulhsw, "V4SiV4SiV4Si", "") +BUILTIN(__builtin_altivec_vmulhuw, "V4UiV4UiV4Ui", "") +BUILTIN(__builtin_altivec_vmulhsd, "V2LLiV2LLiV2LLi", "") +BUILTIN(__builtin_altivec_vmulhud, "V2ULLiV2ULLiV2ULLi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -3288,6 +3288,30 @@ } #endif +/* vec_dive */ + +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed int __ATTRS_o_ai +vec_dive(vector signed int __a, vector signed int __b) { + return __builtin_altivec_vdivesw(__a, __b); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_dive(vector unsigned int __a, vector unsigned int __b) { + return __builtin_altivec_vdiveuw(__a, __b); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_dive(vector signed long long __a, vector signed long long __b) { + return __builtin_altivec_vdivesd(__a, __b); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_dive(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vdiveud(__a, __b); +} +#endif + /* vec_dss */ #define vec_dss __builtin_altivec_dss @@ -5737,6 +5761,30 @@ #endif } +/* vec_mulh */ + +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed int __ATTRS_o_ai +vec_mulh(vector signed int __a, vector signed int __b) { + return __builtin_altivec_vmulhsw(__a, __b); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_mulh(vector unsigned int __a, vector unsigned int __b) { + return __builtin_altivec_vmulhuw(__a, __b); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_mulh(vector signed long long __a, vector signed long long __b) { + return __builtin_altivec_vmulhsd(__a, __b); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_mulh(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vmulhud(__a, __b); +} +#endif + /* vec_mulo */ static __inline__ vector short __ATTRS_o_ai vec_mulo(vector signed char __a, Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -74,6 +74,54 @@ return vec_mod(vulla, vullb); } +vector signed int test_vec_dive_si(void) { + // CHECK: @llvm.ppc.altivec.vdivesw(<4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_dive(vsia, vsib); +} + +vector unsigned int test_vec_dive_ui(void) { + // CHECK: @llvm.ppc.altivec.vdiveuw(<4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_dive(vuia, vuib); +} + +vector signed long long test_vec_dive_sll(void) { + // CHECK: @llvm.ppc.altivec.vdivesd(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_dive(vslla, vsllb); +} + +vector unsigned long long test_vec_dive_ull(void) { + // CHECK: @llvm.ppc.altivec.vdiveud(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_dive(vulla, vullb); +} + +vector signed int test_vec_mulh_si(void) { + // CHECK: @llvm.ppc.altivec.vmulhsw(<4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_mulh(vsia, vsib); +} + +vector unsigned int test_vec_mulh_ui(void) { + // CHECK: @llvm.ppc.altivec.vmulhuw(<4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_mulh(vuia, vuib); +} + +vector signed long long test_vec_mulh_sll(void) { + // CHECK: @llvm.ppc.altivec.vmulhsd(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_mulh(vslla, vsllb); +} + +vector unsigned long long test_vec_mulh_ull(void) { + // CHECK: @llvm.ppc.altivec.vmulhud(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_mulh(vulla, vullb); +} + vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> // CHECK-NEXT: ret <2 x i64> Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -800,6 +800,18 @@ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; +// Vector Divide Extended Intrinsics. +def int_ppc_altivec_vdivesw : PowerPC_Vec_WWW_Intrinsic<"vdivesw">; +def int_ppc_altivec_vdiveuw : PowerPC_Vec_WWW_Intrinsic<"vdiveuw">; +def int_ppc_altivec_vdivesd : PowerPC_Vec_DDD_Intrinsic<"vdivesd">; +def int_ppc_altivec_vdiveud : PowerPC_Vec_DDD_Intrinsic<"vdiveud">; + +// Vector Multiply High Intrinsics. +def int_ppc_altivec_vmulhsw : PowerPC_Vec_WWW_Intrinsic<"vmulhsw">; +def int_ppc_altivec_vmulhuw : PowerPC_Vec_WWW_Intrinsic<"vmulhuw">; +def int_ppc_altivec_vmulhsd : PowerPC_Vec_DDD_Intrinsic<"vmulhsd">; +def int_ppc_altivec_vmulhud : PowerPC_Vec_DDD_Intrinsic<"vmulhud">; + //===----------------------------------------------------------------------===// // PowerPC VSX Intrinsic Definitions. Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -607,6 +607,22 @@ def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vdivud $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>; + def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivesw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, + (int_ppc_altivec_vdivesw v4i32:$vA, v4i32:$vB))]>; + def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdiveuw $vD, $vA, $vB", IIC_VecGeneral, + [(set v4i32:$vD, + (int_ppc_altivec_vdiveuw v4i32:$vA, v4i32:$vB))]>; + def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdivesd $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vdivesd v2i64:$vA, v2i64:$vB))]>; + def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), + "vdiveud $vD, $vA, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vdiveud v2i64:$vA, v2i64:$vB))]>; } //---------------------------- Anonymous Patterns ----------------------------// @@ -619,4 +635,14 @@ (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>; def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)), (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>; + + // Exploit the vector multiply high instructions using intrinsics. + def : Pat<(v4i32 (int_ppc_altivec_vmulhsw v4i32:$vA, v4i32:$vB)), + (v4i32 (VMULHSW $vA, $vB))>; + def : Pat<(v4i32 (int_ppc_altivec_vmulhuw v4i32:$vA, v4i32:$vB)), + (v4i32 (VMULHUW $vA, $vB))>; + def : Pat<(v2i64 (int_ppc_altivec_vmulhsd v2i64:$vA, v2i64:$vB)), + (v2i64 (VMULHSD $vA, $vB))>; + def : Pat<(v2i64 (int_ppc_altivec_vmulhud v2i64:$vA, v2i64:$vB)), + (v2i64 (VMULHUD $vA, $vB))>; } Index: llvm/test/CodeGen/PowerPC/p10-vector-divide.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-vector-divide.ll +++ llvm/test/CodeGen/PowerPC/p10-vector-divide.ll @@ -46,3 +46,49 @@ %div = sdiv <4 x i32> %a, %b ret <4 x i32> %div } + +; Test the vector divide extended intrinsics. +declare <4 x i32> @llvm.ppc.altivec.vdivesw(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.ppc.altivec.vdiveuw(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.ppc.altivec.vdivesd(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64>, <2 x i64>) + +define <4 x i32> @test_vdivesw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vdivesw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivesw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = tail call <4 x i32> @llvm.ppc.altivec.vdivesw(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %div +} + +define <4 x i32> @test_vdiveuw(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vdiveuw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiveuw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = tail call <4 x i32> @llvm.ppc.altivec.vdiveuw(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %div +} + +define <2 x i64> @test_vdivesd(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vdivesd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdivesd v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = tail call <2 x i64> @llvm.ppc.altivec.vdivesd(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %div +} + +define <2 x i64> @test_vdiveud(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vdiveud: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiveud v2, v2, v3 +; CHECK-NEXT: blr +entry: + %div = tail call <2 x i64> @llvm.ppc.altivec.vdiveud(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %div +} Index: llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll +++ llvm/test/CodeGen/PowerPC/p10-vector-multiply.ll @@ -72,3 +72,49 @@ %tr = trunc <4 x i64> %shr to <4 x i32> ret <4 x i32> %tr } + +; Test the vector multiply high intrinsics. +declare <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64>, <2 x i64>) + +define <4 x i32> @test_vmulhsw_intrinsic(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmulhsw_intrinsic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhsw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhsw(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %mulh +} + +define <4 x i32> @test_vmulhuw_intrinsic(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: test_vmulhuw_intrinsic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhuw v2, v2, v3 +; CHECK-NEXT: blr +entry: + %mulh = tail call <4 x i32> @llvm.ppc.altivec.vmulhuw(<4 x i32> %a, <4 x i32> %b) + ret <4 x i32> %mulh +} + +define <2 x i64> @test_vmulhsd_intrinsic(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmulhsd_intrinsic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhsd v2, v2, v3 +; CHECK-NEXT: blr +entry: + %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhsd(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %mulh +} + +define <2 x i64> @test_vmulhud_intrinsic(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vmulhud_intrinsic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulhud v2, v2, v3 +; CHECK-NEXT: blr +entry: + %mulh = tail call <2 x i64> @llvm.ppc.altivec.vmulhud(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %mulh +} Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -81,3 +81,15 @@ # CHECK: vmulhud 1, 2, 3 0x10 0x22 0x1a 0xc9 + +# CHECK: vdivesw 21, 11, 10 +0x12 0xab 0x53 0x8b + +# CHECK: vdiveuw 21, 11, 10 +0x12 0xab 0x52 0x8b + +# CHECK: vdivesd 21, 11, 10 +0x12 0xab 0x53 0xcb + +# CHECK: vdiveud 21, 11, 10 +0x12 0xab 0x52 0xcb Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- llvm/test/MC/PowerPC/p10.s +++ llvm/test/MC/PowerPC/p10.s @@ -84,3 +84,15 @@ # CHECK-BE: vmulhud 1, 2, 3 # encoding: [0x10,0x22,0x1a,0xc9] # CHECK-LE: vmulhud 1, 2, 3 # encoding: [0xc9,0x1a,0x22,0x10] vmulhud 1, 2, 3 +# CHECK-BE: vdivesw 21, 11, 10 # encoding: [0x12,0xab,0x53,0x8b] +# CHECK-LE: vdivesw 21, 11, 10 # encoding: [0x8b,0x53,0xab,0x12] + vdivesw 21, 11, 10 +# CHECK-BE: vdiveuw 21, 11, 10 # encoding: [0x12,0xab,0x52,0x8b] +# CHECK-LE: vdiveuw 21, 11, 10 # encoding: [0x8b,0x52,0xab,0x12] + vdiveuw 21, 11, 10 +# CHECK-BE: vdivesd 21, 11, 10 # encoding: [0x12,0xab,0x53,0xcb] +# CHECK-LE: vdivesd 21, 11, 10 # encoding: [0xcb,0x53,0xab,0x12] + vdivesd 21, 11, 10 +# CHECK-BE: vdiveud 21, 11, 10 # encoding: [0x12,0xab,0x52,0xcb] +# CHECK-LE: vdiveud 21, 11, 10 # encoding: [0xcb,0x52,0xab,0x12] + vdiveud 21, 11, 10