Index: clang/include/clang/Basic/BuiltinsPPC.def =================================================================== --- clang/include/clang/Basic/BuiltinsPPC.def +++ clang/include/clang/Basic/BuiltinsPPC.def @@ -305,6 +305,13 @@ BUILTIN(__builtin_altivec_vextractdm, "UiV2ULLi", "") BUILTIN(__builtin_altivec_vextractqm, "UiV1ULLLi", "") +// P10 Vector Expand with Mask built-ins. +BUILTIN(__builtin_altivec_vexpandbm, "V16UcV16Uc", "") +BUILTIN(__builtin_altivec_vexpandhm, "V8UsV8Us", "") +BUILTIN(__builtin_altivec_vexpandwm, "V4UiV4Ui", "") +BUILTIN(__builtin_altivec_vexpanddm, "V2ULLiV2ULLi", "") +BUILTIN(__builtin_altivec_vexpandqm, "V1ULLLiV1ULLLi", "") + // P10 Vector Parallel Bits built-ins. BUILTIN(__builtin_altivec_vpdepd, "V2ULLiV2ULLiV2ULLi", "") BUILTIN(__builtin_altivec_vpextd, "V2ULLiV2ULLiV2ULLi", "") Index: clang/lib/Headers/altivec.h =================================================================== --- clang/lib/Headers/altivec.h +++ clang/lib/Headers/altivec.h @@ -16791,6 +16791,33 @@ return __builtin_altivec_vextractqm(__a); } +/* vec_expandm */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_expandm(vector unsigned char __a) { + return __builtin_altivec_vexpandbm(__a); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_expandm(vector unsigned short __a) { + return __builtin_altivec_vexpandhm(__a); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_expandm(vector unsigned int __a) { + return __builtin_altivec_vexpandwm(__a); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_expandm(vector unsigned long long __a) { + return __builtin_altivec_vexpanddm(__a); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_expandm(vector unsigned __int128 __a) { + return __builtin_altivec_vexpandqm(__a); +} + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai Index: clang/test/CodeGen/builtins-ppc-p10vector.c =================================================================== --- clang/test/CodeGen/builtins-ppc-p10vector.c +++ clang/test/CodeGen/builtins-ppc-p10vector.c @@ -43,6 +43,36 @@ return vec_extractm(vui128a); } +vector unsigned char test_vexpandm_uc(void) { + // CHECK: @llvm.ppc.altivec.vexpandbm(<16 x i8> + // CHECK-NEXT: ret <16 x i8> + return vec_expandm(vuca); +} + +vector unsigned short test_vexpandm_us(void) { + // CHECK: @llvm.ppc.altivec.vexpandhm(<8 x i16> + // CHECK-NEXT: ret <8 x i16> + return vec_expandm(vusa); +} + +vector unsigned int test_vexpandm_ui(void) { + // CHECK: @llvm.ppc.altivec.vexpandwm(<4 x i32> + // CHECK-NEXT: ret <4 x i32> + return vec_expandm(vuia); +} + +vector unsigned long long test_vexpandm_ull(void) { + // CHECK: @llvm.ppc.altivec.vexpanddm(<2 x i64> + // CHECK-NEXT: ret <2 x i64> + return vec_expandm(vulla); +} + +vector unsigned __int128 test_vexpandm_u128(void) { + // CHECK: @llvm.ppc.altivec.vexpandqm(<1 x i128> + // CHECK-NEXT: ret <1 x i128> + return vec_expandm(vui128a); +} + vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> // CHECK-NEXT: ret <2 x i64> Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -435,6 +435,18 @@ def int_ppc_altivec_vextractqm : GCCBuiltin<"__builtin_altivec_vextractqm">, Intrinsic<[llvm_i32_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Expand with Mask + def int_ppc_altivec_vexpandbm : GCCBuiltin<"__builtin_altivec_vexpandbm">, + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandhm : GCCBuiltin<"__builtin_altivec_vexpandhm">, + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandwm : GCCBuiltin<"__builtin_altivec_vexpandwm">, + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpanddm : GCCBuiltin<"__builtin_altivec_vexpanddm">, + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_ppc_altivec_vexpandqm : GCCBuiltin<"__builtin_altivec_vexpandqm">, + Intrinsic<[llvm_v1i128_ty], [llvm_v1i128_ty], [IntrNoMem]>; + // P10 Vector Parallel Bits Deposit/Extract Doubleword Builtins. def int_ppc_altivec_vpdepd : GCCBuiltin<"__builtin_altivec_vpdepd">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -586,6 +586,26 @@ "vextractqm $rD, $vB", IIC_VecGeneral, [(set i32:$rD, (int_ppc_altivec_vextractqm v1i128:$vB))]>; + def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandbm $vD, $vB", IIC_VecGeneral, + [(set v16i8:$vD, + (int_ppc_altivec_vexpandbm v16i8:$vB))]>; + def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandhm $vD, $vB", IIC_VecGeneral, + [(set v8i16:$vD, + (int_ppc_altivec_vexpandhm v8i16:$vB))]>; + def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandwm $vD, $vB", IIC_VecGeneral, + [(set v4i32:$vD, + (int_ppc_altivec_vexpandwm v4i32:$vB))]>; + def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpanddm $vD, $vB", IIC_VecGeneral, + [(set v2i64:$vD, + (int_ppc_altivec_vexpanddm v2i64:$vB))]>; + def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB), + "vexpandqm $vD, $vB", IIC_VecGeneral, + [(set v1i128:$vD, + (int_ppc_altivec_vexpandqm v1i128:$vB))]>; def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), "vpdepd $vD, $vA, $vB", IIC_VecGeneral, [(set v2i64:$vD, Index: llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll +++ llvm/test/CodeGen/PowerPC/p10-vector-mask-ops.ll @@ -61,3 +61,59 @@ declare i32 @llvm.ppc.altivec.vextractwm(<4 x i32>) declare i32 @llvm.ppc.altivec.vextractdm(<2 x i64>) declare i32 @llvm.ppc.altivec.vextractqm(<1 x i128>) + +define <16 x i8> @test_vexpandbm(<16 x i8> %a) { +; CHECK-LABEL: test_vexpandbm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandbm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <16 x i8> @llvm.ppc.altivec.vexpandbm(<16 x i8> %a) + ret <16 x i8> %exp +} + +define <8 x i16> @test_vexpandhm(<8 x i16> %a) { +; CHECK-LABEL: test_vexpandhm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandhm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <8 x i16> @llvm.ppc.altivec.vexpandhm(<8 x i16> %a) + ret <8 x i16> %exp +} + +define <4 x i32> @test_vexpandwm(<4 x i32> %a) { +; CHECK-LABEL: test_vexpandwm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandwm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <4 x i32> @llvm.ppc.altivec.vexpandwm(<4 x i32> %a) + ret <4 x i32> %exp +} + +define <2 x i64> @test_vexpanddm(<2 x i64> %a) { +; CHECK-LABEL: test_vexpanddm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpanddm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <2 x i64> @llvm.ppc.altivec.vexpanddm(<2 x i64> %a) + ret <2 x i64> %exp +} + +define <1 x i128> @test_vexpandqm(<1 x i128> %a) { +; CHECK-LABEL: test_vexpandqm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexpandqm v2, v2 +; CHECK-NEXT: blr +entry: + %exp = tail call <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128> %a) + ret <1 x i128> %exp +} + +declare <16 x i8> @llvm.ppc.altivec.vexpandbm(<16 x i8>) +declare <8 x i16> @llvm.ppc.altivec.vexpandhm(<8 x i16>) +declare <4 x i32> @llvm.ppc.altivec.vexpandwm(<4 x i32>) +declare <2 x i64> @llvm.ppc.altivec.vexpanddm(<2 x i64>) +declare <1 x i128> @llvm.ppc.altivec.vexpandqm(<1 x i128>) Index: llvm/test/MC/Disassembler/PowerPC/p10insts.txt =================================================================== --- llvm/test/MC/Disassembler/PowerPC/p10insts.txt +++ llvm/test/MC/Disassembler/PowerPC/p10insts.txt @@ -16,6 +16,21 @@ # CHECK: vextractqm 1, 2 0x10 0x2c 0x16 0x42 +# CHECK: vexpandbm 1, 2 +0x10 0x20 0x16 0x42 + +# CHECK: vexpandhm 1, 2 +0x10 0x21 0x16 0x42 + +# CHECK: vexpandwm 1, 2 +0x10 0x22 0x16 0x42 + +# CHECK: vexpanddm 1, 2 +0x10 0x23 0x16 0x42 + +# CHECK: vexpandqm 1, 2 +0x10 0x24 0x16 0x42 + # CHECK: vpdepd 1, 2, 0 0x10 0x22 0x05 0xcd Index: llvm/test/MC/PowerPC/p10.s =================================================================== --- llvm/test/MC/PowerPC/p10.s +++ llvm/test/MC/PowerPC/p10.s @@ -18,6 +18,21 @@ # CHECK-BE: vextractqm 1, 2 # encoding: [0x10,0x2c,0x16,0x42] # CHECK-LE: vextractqm 1, 2 # encoding: [0x42,0x16,0x2c,0x10] vextractqm 1, 2 +# CHECK-BE: vexpandbm 1, 2 # encoding: [0x10,0x20,0x16,0x42] +# CHECK-LE: vexpandbm 1, 2 # encoding: [0x42,0x16,0x20,0x10] + vexpandbm 1, 2 +# CHECK-BE: vexpandhm 1, 2 # encoding: [0x10,0x21,0x16,0x42] +# CHECK-LE: vexpandhm 1, 2 # encoding: [0x42,0x16,0x21,0x10] + vexpandhm 1, 2 +# CHECK-BE: vexpandwm 1, 2 # encoding: [0x10,0x22,0x16,0x42] +# CHECK-LE: vexpandwm 1, 2 # encoding: [0x42,0x16,0x22,0x10] + vexpandwm 1, 2 +# CHECK-BE: vexpanddm 1, 2 # encoding: [0x10,0x23,0x16,0x42] +# CHECK-LE: vexpanddm 1, 2 # encoding: [0x42,0x16,0x23,0x10] + vexpanddm 1, 2 +# CHECK-BE: vexpandqm 1, 2 # encoding: [0x10,0x24,0x16,0x42] +# CHECK-LE: vexpandqm 1, 2 # encoding: [0x42,0x16,0x24,0x10] + vexpandqm 1, 2 # CHECK-BE: vpdepd 1, 2, 0 # encoding: [0x10,0x22,0x05,0xcd] # CHECK-LE: vpdepd 1, 2, 0 # encoding: [0xcd,0x05,0x22,0x10] vpdepd 1, 2, 0