diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -484,6 +484,9 @@ BUILTIN(__builtin_vsx_xvcvsphp, "V4fV4f", "") BUILTIN(__builtin_vsx_xvcvhpsp, "V4fV8Us", "") +BUILTIN(__builtin_vsx_xvcvspbf16, "V16UcV16Uc", "") +BUILTIN(__builtin_vsx_xvcvbf16spn, "V16UcV16Uc", "") + // Vector Test Data Class builtins BUILTIN(__builtin_vsx_xvtstdcdp, "V2ULLiV2dIi", "") BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "") diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c --- a/clang/test/CodeGen/builtins-ppc-p10vector.c +++ b/clang/test/CodeGen/builtins-ppc-p10vector.c @@ -137,6 +137,18 @@ return vec_mod(vulla, vullb); } +vector unsigned char test_xvcvspbf16(vector unsigned char vc) { + // CHECK-LABEL: @test_xvcvspbf16( + // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8> [[VC:%.*]]) + return __builtin_vsx_xvcvspbf16(vc); +} + +vector unsigned char test_xvcvbf16spn(vector unsigned char vc) { + // CHECK-LABEL: @test_xvcvbf16spn( + // CHECK: [[TMP0:%.*]] = call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> [[VC:%.*]]) + return __builtin_vsx_xvcvbf16spn(vc); +} + vector unsigned long long test_vpdepd(void) { // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64> // CHECK-NEXT: ret <2 x i64> diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1124,6 +1124,12 @@ def int_ppc_vsx_xvcvhpsp : PowerPC_VSX_Intrinsic<"xvcvhpsp", [llvm_v4f32_ty], [llvm_v8i16_ty],[IntrNoMem]>; +def int_ppc_vsx_xvcvspbf16 : + PowerPC_VSX_Intrinsic<"xvcvspbf16", [llvm_v16i8_ty], + [llvm_v16i8_ty], [IntrNoMem]>; +def int_ppc_vsx_xvcvbf16spn : + PowerPC_VSX_Intrinsic<"xvcvbf16spn", [llvm_v16i8_ty], + [llvm_v16i8_ty], [IntrNoMem]>; def int_ppc_vsx_xxextractuw : PowerPC_VSX_Intrinsic<"xxextractuw",[llvm_v2i64_ty], [llvm_v2i64_ty,llvm_i32_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -515,6 +515,11 @@ def PairedVectorMemops : Predicate<"PPCSubTarget->pairedVectorMemops()">; def MMA : Predicate<"PPCSubTarget->hasMMA()">; +def RCCp { + dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC); + dag BToVSRC = (COPY_TO_REGCLASS $XB, VSRC); +} + let Predicates = [PrefixInstrs] in { let Interpretation64Bit = 1, isCodeGenOnly = 1 in { defm PADDI8 : @@ -1351,6 +1356,13 @@ (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>; } +let Predicates = [IsISA3_1, HasVSX] in { + def : Pat<(v16i8 (int_ppc_vsx_xvcvspbf16 v16i8:$XA)), + (COPY_TO_REGCLASS (XVCVSPBF16 RCCp.AToVSRC), VRRC)>; + def : Pat<(v16i8 (int_ppc_vsx_xvcvbf16spn v16i8:$XA)), + (COPY_TO_REGCLASS (XVCVBF16SPN RCCp.AToVSRC), VRRC)>; +} + let AddedComplexity = 400, Predicates = [IsISA3_1] in { def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$rS, 0)), xoaddr:$src), (STXVRBX (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$src)>; diff --git a/llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll b/llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/bfloat16-outer-product.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +; Function Attrs: nofree nounwind writeonly +define dso_local void @test60(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) { +; CHECK-LABEL: test60: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvcvspbf16 vs0, v2 +; CHECK-NEXT: stxv vs0, 0(r7) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test60: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xvcvspbf16 vs0, v2 +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8> %vc) + %1 = bitcast i8* %resp to <16 x i8>* + store <16 x i8> %0, <16 x i8>* %1, align 16 + ret void +} +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.vsx.xvcvspbf16(<16 x i8>) + +; Function Attrs: nofree nounwind writeonly +define dso_local void @test61(i8* nocapture readnone %vqp, i8* nocapture readnone %vpp, <16 x i8> %vc, i8* nocapture %resp) { +; CHECK-LABEL: test61: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvcvbf16spn vs0, v2 +; CHECK-NEXT: stxv vs0, 0(r7) +; CHECK-NEXT: blr +; +; CHECK-BE-LABEL: test61: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xvcvbf16spn vs0, v2 +; CHECK-BE-NEXT: stxv vs0, 0(r7) +; CHECK-BE-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8> %vc) + %1 = bitcast i8* %resp to <16 x i8>* + store <16 x i8> %0, <16 x i8>* %1, align 16 + ret void +} + +; Function Attrs: nounwind readnone +declare <16 x i8> @llvm.ppc.vsx.xvcvbf16spn(<16 x i8>)