diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -305,6 +305,13 @@ BUILTIN(__builtin_altivec_vextractdm, "UiV2ULLi", "") BUILTIN(__builtin_altivec_vextractqm, "UiV1ULLLi", "") +// P10 Vector Expand with Mask built-ins. +BUILTIN(__builtin_altivec_vexpandbm, "V16UcV16Uc", "") +BUILTIN(__builtin_altivec_vexpandhm, "V8UsV8Us", "") +BUILTIN(__builtin_altivec_vexpandwm, "V4UiV4Ui", "") +BUILTIN(__builtin_altivec_vexpanddm, "V2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vexpandqm, "V1ULLLiV1ULLLi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -16843,6 +16843,33 @@ return __builtin_altivec_vextractqm(__a); } +/* vec_expandm */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_expandm(vector unsigned char __a) { + return __builtin_altivec_vexpandbm(__a); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_expandm(vector unsigned short __a) { + return __builtin_altivec_vexpandhm(__a); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_expandm(vector unsigned int __a) { + return __builtin_altivec_vexpandwm(__a); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_expandm(vector unsigned long long __a) { + return __builtin_altivec_vexpanddm(__a); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_expandm(vector unsigned __int128 __a) { + return __builtin_altivec_vexpandqm(__a); +} + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -133,6 +133,36 @@ return vec_extractm(vui128a); } +vector unsigned char test_vexpandm_uc(void) { + // CHECK: @llvm.ppc.altivec.vexpandbm(<16 x i8> %{{.+}}) + // CHECK-NEXT: ret <16 x i8> + return vec_expandm(vuca); +} + +vector unsigned short test_vexpandm_us(void) { + // CHECK: @llvm.ppc.altivec.vexpandhm(<8 x i16> %{{.+}}) + // CHECK-NEXT: ret <8 x i16> + return vec_expandm(vusa); +} + +vector unsigned int test_vexpandm_ui(void) { + // CHECK: @llvm.ppc.altivec.vexpandwm(<4 x i32> %{{.+}}) + // CHECK-NEXT: ret <4 x i32> + return vec_expandm(vuia); +} + +vector unsigned long long test_vexpandm_ull(void) { + // CHECK: @llvm.ppc.altivec.vexpanddm(<2 x i64> %{{.+}}) + // CHECK-NEXT: ret <2 x i64> + return vec_expandm(vulla); +} + +vector unsigned __int128 test_vexpandm_u128(void) { + // CHECK: @llvm.ppc.altivec.vexpandqm(<1 x i128> %{{.+}}) + // CHECK-NEXT: ret <1 x i128> + return vec_expandm(vui128a); +} + unsigned long long test_vgnb_1(void) { // CHECK: @llvm.ppc.altivec.vgnb(<1 x i128> %{{.+}}, i32 2) // CHECK-NEXT: ret i64 diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -441,6 +441,18 @@ def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">, Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Expand with Mask + def int_ppc_altivec_vexpandbm : GCCBuiltin<"__builtin_altivec_vexpandbm">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandhm : GCCBuiltin<"__builtin_altivec_vexpandhm">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandwm : GCCBuiltin<"__builtin_altivec_vexpandwm">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpanddm : GCCBuiltin<"__builtin_altivec_vexpanddm">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">, + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -877,19 +877,24 @@ (int_ppc_altivec_vextractqm v1i128:$vB))]>; def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandbm $vD, $vB", IIC_VecGeneral, - []>; + [(set v16i8:$vD, (int_ppc_altivec_vexpandbm + v16i8:$vB))]>; def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandhm $vD, $vB", IIC_VecGeneral, - []>; + [(set v8i16:$vD, (int_ppc_altivec_vexpandhm + v8i16:$vB))]>; def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandwm $vD, $vB", IIC_VecGeneral, - []>; + [(set v4i32:$vD, (int_ppc_altivec_vexpandwm + v4i32:$vB))]>; def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB), "vexpanddm $vD, $vB", IIC_VecGeneral, - []>; + [(set v2i64:$vD, (int_ppc_altivec_vexpanddm + v2i64:$vB))]>; def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB), "vexpandqm $vD, $vB", IIC_VecGeneral, - []>; + [(set v1i128:$vD, (int_ppc_altivec_vexpandqm + v1i128:$vB))]>; def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB), "mtvsrbm $vD, $rB", IIC_VecGeneral, []>; diff --git a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll --- a/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll +++ b/llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll @@ -64,3 +64,59 @@ %ext = tail call i32 @llvm.ppc.altivec.vextractqm(<1 x i128> %a) ret i32 %ext } + +declare <16 x i8> @llvm.ppc.altivec.vexpandbm(<16 x i8>) +declare <8 x i16> @llvm.ppc.altivec.vexpandhm(<8 x i16>) +declare <4 x i32> @llvm.ppc.altivec.vexpandwm(<4 x i32>) +declare <2 x i64> @llvm.ppc.altivec.vexpanddm(<2 x i64>) +declare <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128>) + +define <16 x i8> @test_vexpandbm(<16 x i8> %a) { +; CHECK-LABEL: test_vexpandbm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandbm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <16 x i8> @llvm.ppc.altivec.vexpandbm(<16 x i8> %a) + ret <16 x i8> %exp +} + +define <8 x i16> @test_vexpandhm(<8 x i16> %a) { +; CHECK-LABEL: test_vexpandhm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandhm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <8 x i16> @llvm.ppc.altivec.vexpandhm(<8 x i16> %a) + ret <8 x i16> %exp +} + +define <4 x i32> @test_vexpandwm(<4 x i32> %a) { +; CHECK-LABEL: test_vexpandwm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandwm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <4 x i32> @llvm.ppc.altivec.vexpandwm(<4 x i32> %a) + ret <4 x i32> %exp +} + +define <2 x i64> @test_vexpanddm(<2 x i64> %a) { +; CHECK-LABEL: test_vexpanddm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpanddm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <2 x i64> @llvm.ppc.altivec.vexpanddm(<2 x i64> %a) + ret <2 x i64> %exp +} + +define <1 x i128> @test_vexpandqm(<1 x i128> %a) { +; CHECK-LABEL: test_vexpandqm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandqm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128> %a) + ret <1 x i128> %exp +}