diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -298,6 +298,13 @@ BUILTIN(__builtin_altivec_vrlwnm, "V4UiV4UiV4Ui", "") BUILTIN(__builtin_altivec_vrldnm, "V2ULLiV2ULLiV2ULLi", "") +// P10 Vector Extract with Mask built-ins. +BUILTIN(__builtin_altivec_vextractbm, "UiV16Uc", "") +BUILTIN(__builtin_altivec_vextracthm, "UiV8Us", "") +BUILTIN(__builtin_altivec_vextractwm, "UiV4Ui", "") +BUILTIN(__builtin_altivec_vextractdm, "UiV2ULLi", "") +BUILTIN(__builtin_altivec_vextractqm, "UiV1ULLLi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16763,6 +16763,34 @@ } #ifdef __POWER10_VECTOR__ + +/* vec_extractm */ + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned char __a) { + return __builtin_altivec_vextractbm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned short __a) { + return __builtin_altivec_vextracthm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned int __a) { + return __builtin_altivec_vextractwm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned long long __a) { + return __builtin_altivec_vextractdm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned __int128 __a) { + return __builtin_altivec_vextractqm(__a); +} + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -13,6 +13,36 @@ vector unsigned __int128 vui128a, vui128b, vui128c; unsigned int uia; +unsigned int test_vec_extractm_uc(void) { + // CHECK: @llvm.ppc.altivec.vextractbm(<16 x i8> + // CHECK-NEXT: ret i32 + return vec_extractm(vuca); +} + +unsigned int test_vec_extractm_us(void) { + // CHECK: @llvm.ppc.altivec.vextracthm(<8 x i16> + // CHECK-NEXT: ret i32 + return vec_extractm(vusa); +} + +unsigned int test_vec_extractm_ui(void) { + // CHECK: @llvm.ppc.altivec.vextractwm(<4 x i32> + // CHECK-NEXT: ret i32 + return vec_extractm(vuia); +} + +unsigned int test_vec_extractm_ull(void) { + // CHECK: @llvm.ppc.altivec.vextractdm(<2 x i64> + // CHECK-NEXT: ret i32 + return vec_extractm(vulla); +} + +unsigned int test_vec_extractm_u128(void) { + // CHECK: @llvm.ppc.altivec.vextractqm(<1 x i128> + // CHECK-NEXT: ret i32 + return vec_extractm(vui128a); +} + vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> // CHECK-NEXT: ret <2 x i64> diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -423,6 +423,18 @@ def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">, Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>; + // P10 Vector Extract with Mask + def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">, + Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vextracthm : GCCBuiltin<"__builtin_altivec_vextracthm">, + Intrinsic<[llvm_i32_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vextractwm : GCCBuiltin<"__builtin_altivec_vextractwm">, + Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vextractdm : GCCBuiltin<"__builtin_altivec_vextractdm">, + Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">, + Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -566,6 +566,26 @@ } let Predicates = [IsISA3_1] in { + def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$rD), (ins vrrc:$vB), + "vextractbm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractbm v16i8:$vB))]>; + def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$rD), (ins vrrc:$vB), + "vextracthm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextracthm v8i16:$vB))]>; + def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$rD), (ins vrrc:$vB), + "vextractwm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractwm v4i32:$vB))]>; + def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$rD), (ins vrrc:$vB), + "vextractdm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractdm v2i64:$vB))]>; + def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$rD), (ins vrrc:$vB), + "vextractqm $rD, $vB", IIC_VecGeneral, + [(set i32:$rD, + (int_ppc_altivec_vextractqm v1i128:$vB))]>; def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpdepd $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s + +; This test case aims to test the vector mask manipulation operations +; on Power10. + +define i32 @test_vextractbm(<16 x i8> %a) { +; CHECK-LABEL: test_vextractbm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractbm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractbm(<16 x i8> %a) + ret i32 %ext +} + +define i32 @test_vextracthm(<8 x i16> %a) { +; CHECK-LABEL: test_vextracthm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextracthm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextracthm(<8 x i16> %a) + ret i32 %ext +} + +define i32 @test_vextractwm(<4 x i32> %a) { +; CHECK-LABEL: test_vextractwm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractwm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractwm(<4 x i32> %a) + ret i32 %ext +} + +define i32 @test_vextractdm(<2 x i64> %a) { +; CHECK-LABEL: test_vextractdm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractdm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractdm(<2 x i64> %a) + ret i32 %ext +} + +define i32 @test_vextractqm(<1 x i128> %a) { +; CHECK-LABEL: test_vextractqm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextractqm r3, v2 +; CHECK-NEXT: blr +entry: + %ext = tail call i32 @llvm.ppc.altivec.vextractqm(<1 x i128> %a) + ret i32 %ext +} + +declare i32 @llvm.ppc.altivec.vextractbm(<16 x i8>) +declare i32 @llvm.ppc.altivec.vextracthm(<8 x i16>) +declare i32 @llvm.ppc.altivec.vextractwm(<4 x i32>) +declare i32 @llvm.ppc.altivec.vextractdm(<2 x i64>) +declare i32 @llvm.ppc.altivec.vextractqm(<1 x i128>) diff --git a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt --- a/llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ b/llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -1,6 +1,21 @@ # RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-linux-gnu \ # RUN: -mcpu=pwr10 | FileCheck %s +# CHECK: vextractbm 1, 2 +0x10 0x28 0x16 0x42 + +# CHECK: vextracthm 1, 2 +0x10 0x29 0x16 0x42 + +# CHECK: vextractwm 1, 2 +0x10 0x2a 0x16 0x42 + +# CHECK: vextractdm 1, 2 +0x10 0x2b 0x16 0x42 + +# CHECK: vextractqm 1, 2 +0x10 0x2c 0x16 0x42 + # CHECK: vpdepd 1, 2, 0 0x10 0x22 0x05 0xcd diff --git a/llvm/test/MC/PowerPC/p10.s b/llvm/test/MC/PowerPC/p10.s --- a/llvm/test/MC/PowerPC/p10.s +++ b/llvm/test/MC/PowerPC/p10.s @@ -3,6 +3,21 @@ # RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu --show-encoding %s | \ # RUN: FileCheck -check-prefix=CHECK-LE %s +# CHECK-BE: vextractbm 1, 2 # encoding: [0x10,0x28,0x16,0x42] +# CHECK-LE: vextractbm 1, 2 # encoding: [0x42,0x16,0x28,0x10] + vextractbm 1, 2 +# CHECK-BE: vextracthm 1, 2 # encoding: [0x10,0x29,0x16,0x42] +# CHECK-LE: vextracthm 1, 2 # encoding: [0x42,0x16,0x29,0x10] + vextracthm 1, 2 +# CHECK-BE: vextractwm 1, 2 # encoding: [0x10,0x2a,0x16,0x42] +# CHECK-LE: vextractwm 1, 2 # encoding: [0x42,0x16,0x2a,0x10] + vextractwm 1, 2 +# CHECK-BE: vextractdm 1, 2 # encoding: [0x10,0x2b,0x16,0x42] +# CHECK-LE: vextractdm 1, 2 # encoding: [0x42,0x16,0x2b,0x10] + vextractdm 1, 2 +# CHECK-BE: vextractqm 1, 2 # encoding: [0x10,0x2c,0x16,0x42] +# CHECK-LE: vextractqm 1, 2 # encoding: [0x42,0x16,0x2c,0x10] + vextractqm 1, 2 # CHECK-BE: vpdepd 1, 2, 0 # encoding: [0x10,0x22,0x05,0xcd] # CHECK-LE: vpdepd 1, 2, 0 # encoding: [0xcd,0x05,0x22,0x10] vpdepd 1, 2, 0